36
36
import org .apache .seatunnel .common .constants .PluginType ;
37
37
import org .apache .seatunnel .common .utils .FileUtils ;
38
38
import org .apache .seatunnel .common .utils .ReflectionUtils ;
39
+ import org .apache .seatunnel .common .utils .SeaTunnelException ;
39
40
40
41
import org .apache .commons .lang3 .ArrayUtils ;
41
42
import org .apache .commons .lang3 .StringUtils ;
44
45
import lombok .extern .slf4j .Slf4j ;
45
46
46
47
import java .io .File ;
47
- import java .io .FileFilter ;
48
48
import java .io .IOException ;
49
49
import java .net .MalformedURLException ;
50
50
import java .net .URL ;
55
55
import java .util .Collection ;
56
56
import java .util .Collections ;
57
57
import java .util .HashMap ;
58
- import java .util .HashSet ;
59
58
import java .util .LinkedHashMap ;
60
59
import java .util .List ;
61
60
import java .util .Map ;
62
61
import java .util .Optional ;
63
62
import java .util .ServiceLoader ;
64
- import java .util .Set ;
65
63
import java .util .concurrent .ConcurrentHashMap ;
66
64
import java .util .function .BiConsumer ;
67
65
import java .util .stream .Collectors ;
@@ -90,6 +88,9 @@ public abstract class AbstractPluginDiscovery<T> implements PluginDiscovery<T> {
90
88
private final BiConsumer <ClassLoader , URL > addURLToClassLoaderConsumer ;
91
89
protected final ConcurrentHashMap <PluginIdentifier , Optional <URL >> pluginJarPath =
92
90
new ConcurrentHashMap <>(Common .COLLECTION_SIZE );
91
+ protected final Map <PluginIdentifier , String > sourcePluginInstance ;
92
+ protected final Map <PluginIdentifier , String > sinkPluginInstance ;
93
+ protected final Map <PluginIdentifier , String > transformPluginInstance ;
93
94
94
95
public AbstractPluginDiscovery (BiConsumer <ClassLoader , URL > addURLToClassloader ) {
95
96
this (Common .connectorDir (), loadConnectorPluginConfig (), addURLToClassloader );
@@ -114,6 +115,9 @@ public AbstractPluginDiscovery(
114
115
this .pluginDir = pluginDir ;
115
116
this .pluginMappingConfig = pluginMappingConfig ;
116
117
this .addURLToClassLoaderConsumer = addURLToClassLoaderConsumer ;
118
+ this .sourcePluginInstance = getAllSupportedPlugins (PluginType .SOURCE );
119
+ this .sinkPluginInstance = getAllSupportedPlugins (PluginType .SINK );
120
+ this .transformPluginInstance = getAllSupportedPlugins (PluginType .TRANSFORM );
117
121
log .info ("Load {} Plugin from {}" , getPluginBaseClass ().getSimpleName (), pluginDir );
118
122
}
119
123
@@ -423,14 +427,10 @@ private Optional<URL> findPluginJarPath(PluginIdentifier pluginIdentifier) {
423
427
pluginDir
424
428
.toFile ()
425
429
.listFiles (
426
- new FileFilter () {
427
- @ Override
428
- public boolean accept (File pathname ) {
429
- return pathname .getName ().endsWith (".jar" )
430
+ pathname ->
431
+ pathname .getName ().endsWith (".jar" )
430
432
&& StringUtils .startsWithIgnoreCase (
431
- pathname .getName (), pluginJarPrefix );
432
- }
433
- });
433
+ pathname .getName (), pluginJarPrefix ));
434
434
if (ArrayUtils .isEmpty (targetPluginFiles )) {
435
435
return Optional .empty ();
436
436
}
@@ -439,10 +439,9 @@ public boolean accept(File pathname) {
439
439
if (targetPluginFiles .length == 1 ) {
440
440
pluginJarPath = targetPluginFiles [0 ].toURI ().toURL ();
441
441
} else {
442
+ PluginType type = PluginType .valueOf (pluginType .toUpperCase ());
442
443
pluginJarPath =
443
- findMostSimlarPluginJarFile (targetPluginFiles , pluginJarPrefix )
444
- .toURI ()
445
- .toURL ();
444
+ selectPluginJar (targetPluginFiles , pluginJarPrefix , pluginName , type ).get ();
446
445
}
447
446
log .info ("Discovery plugin jar for: {} at: {}" , pluginIdentifier , pluginJarPath );
448
447
return Optional .of (pluginJarPath );
@@ -455,104 +454,59 @@ public boolean accept(File pathname) {
455
454
}
456
455
}
457
456
458
- private static File findMostSimlarPluginJarFile (
459
- File [] targetPluginFiles , String pluginJarPrefix ) {
460
- String splitRegex = " \\ -| \\ _| \\ ." ;
461
- double maxSimlarity = - Integer . MAX_VALUE ;
462
- int mostSimlarPluginJarFileIndex = - 1 ;
463
- for ( int i = 0 ; i < targetPluginFiles . length ; i ++) {
464
- File file = targetPluginFiles [ i ];
465
- String fileName = file . getName ();
466
- double similarity =
467
- CosineSimilarityUtil . cosineSimilarity ( pluginJarPrefix , fileName , splitRegex );
468
- if ( similarity > maxSimlarity ) {
469
- maxSimlarity = similarity ;
470
- mostSimlarPluginJarFileIndex = i ;
471
- }
457
+ private Optional < URL > selectPluginJar (
458
+ File [] targetPluginFiles , String pluginJarPrefix , String pluginName , PluginType type ) {
459
+ List < URL > resMatchedUrls = new ArrayList <>() ;
460
+ for ( File file : targetPluginFiles ) {
461
+ Optional < URL > matchedUrl = findMatchingUrl ( file , type ) ;
462
+ matchedUrl . ifPresent ( resMatchedUrls :: add );
463
+ }
464
+ if ( resMatchedUrls . size () != 1 ) {
465
+ throw new SeaTunnelException (
466
+ String . format (
467
+ "Cannot find unique plugin jar for pluginIdentifier: %s -> %s. Possible impact jar: %s" ,
468
+ pluginName , pluginJarPrefix , Arrays . asList ( targetPluginFiles ))) ;
469
+ } else {
470
+ return Optional . of ( resMatchedUrls . get ( 0 ));
472
471
}
473
- return targetPluginFiles [mostSimlarPluginJarFileIndex ];
474
472
}
475
473
476
- static class CosineSimilarityUtil {
477
- public static double cosineSimilarity (String textA , String textB , String splitRegrex ) {
478
- Set <String > words1 =
479
- new HashSet <>(Arrays .asList (textA .toLowerCase ().split (splitRegrex )));
480
- Set <String > words2 =
481
- new HashSet <>(Arrays .asList (textB .toLowerCase ().split (splitRegrex )));
482
- int [] termFrequency1 = calculateTermFrequencyVector (textA , words1 , splitRegrex );
483
- int [] termFrequency2 = calculateTermFrequencyVector (textB , words2 , splitRegrex );
484
- return calculateCosineSimilarity (termFrequency1 , termFrequency2 );
474
+ private Optional <URL > findMatchingUrl (File file , PluginType type ) {
475
+ Map <PluginIdentifier , String > pluginInstanceMap = null ;
476
+ switch (type ) {
477
+ case SINK :
478
+ pluginInstanceMap = sinkPluginInstance ;
479
+ break ;
480
+ case SOURCE :
481
+ pluginInstanceMap = sourcePluginInstance ;
482
+ break ;
483
+ case TRANSFORM :
484
+ pluginInstanceMap = transformPluginInstance ;
485
+ break ;
485
486
}
486
-
487
- private static int [] calculateTermFrequencyVector (
488
- String text , Set <String > words , String splitRegrex ) {
489
- int [] termFrequencyVector = new int [words .size ()];
490
- String [] textArray = text .toLowerCase ().split (splitRegrex );
491
- List <String > orderedWords = new ArrayList <String >();
492
- words .clear ();
493
- for (String word : textArray ) {
494
- if (!words .contains (word )) {
495
- orderedWords .add (word );
496
- words .add (word );
497
- }
498
- }
499
- for (String word : textArray ) {
500
- if (words .contains (word )) {
501
- int index = 0 ;
502
- for (String w : orderedWords ) {
503
- if (w .equals (word )) {
504
- termFrequencyVector [index ]++;
505
- break ;
506
- }
507
- index ++;
508
- }
509
- }
510
- }
511
- return termFrequencyVector ;
487
+ if (pluginInstanceMap == null ) {
488
+ return Optional .empty ();
512
489
}
513
-
514
- private static double calculateCosineSimilarity (int [] vectorA , int [] vectorB ) {
515
- double dotProduct = 0.0 ;
516
- double magnitudeA = 0.0 ;
517
- double magnitudeB = 0.0 ;
518
- int vectorALength = vectorA .length ;
519
- int vectorBLength = vectorB .length ;
520
- if (vectorALength < vectorBLength ) {
521
- int [] vectorTemp = new int [vectorBLength ];
522
- for (int i = 0 ; i < vectorB .length ; i ++) {
523
- if (i <= vectorALength - 1 ) {
524
- vectorTemp [i ] = vectorA [i ];
525
- } else {
526
- vectorTemp [i ] = 0 ;
527
- }
528
- }
529
- vectorA = vectorTemp ;
530
- }
531
- if (vectorALength > vectorBLength ) {
532
- int [] vectorTemp = new int [vectorALength ];
533
- for (int i = 0 ; i < vectorA .length ; i ++) {
534
- if (i <= vectorBLength - 1 ) {
535
- vectorTemp [i ] = vectorB [i ];
536
- } else {
537
- vectorTemp [i ] = 0 ;
538
- }
539
- }
540
- vectorB = vectorTemp ;
541
- }
542
- for (int i = 0 ; i < vectorA .length ; i ++) {
543
- dotProduct += vectorA [i ] * vectorB [i ];
544
- magnitudeA += Math .pow (vectorA [i ], 2 );
545
- magnitudeB += Math .pow (vectorB [i ], 2 );
490
+ List <PluginIdentifier > matchedIdentifier = new ArrayList <>();
491
+ for (Map .Entry <PluginIdentifier , String > entry : pluginInstanceMap .entrySet ()) {
492
+ if (file .getName ().startsWith (entry .getValue ())) {
493
+ matchedIdentifier .add (entry .getKey ());
546
494
}
495
+ }
547
496
548
- magnitudeA = Math .sqrt (magnitudeA );
549
- magnitudeB = Math .sqrt (magnitudeB );
550
-
551
- if (magnitudeA == 0 || magnitudeB == 0 ) {
552
- return 0.0 ; // Avoid dividing by 0
553
- } else {
554
- return dotProduct / (magnitudeA * magnitudeB );
497
+ if (matchedIdentifier .size () == 1 ) {
498
+ try {
499
+ return Optional .of (file .toURI ().toURL ());
500
+ } catch (MalformedURLException e ) {
501
+ log .warn ("Cannot get plugin URL for pluginIdentifier: {}" , file , e );
555
502
}
556
503
}
504
+ if (log .isDebugEnabled ()) {
505
+ log .debug (
506
+ "File found: {}, matches more than one PluginIdentifier: {}" ,
507
+ file .getName (),
508
+ matchedIdentifier );
509
+ }
510
+ return Optional .empty ();
557
511
}
558
512
}
0 commit comments