2222import java .util .Set ;
2323import java .util .regex .Matcher ;
2424import java .util .regex .Pattern ;
25+ import org .apache .commons .lang .StringUtils ;
2526import pl .edu .icm .cermine .metadata .model .DocumentAuthor ;
2627import pl .edu .icm .cermine .metadata .model .DocumentMetadata ;
2728import pl .edu .icm .cermine .structure .model .BxZone ;
@@ -55,7 +56,9 @@ protected boolean enhanceMetadata(BxZone zone, DocumentMetadata metadata) {
5556 String domain = matcher .group (2 );
5657 String [] names = emails .split ("[\\ |, ]+" );
5758 for (String name : names ) {
58- addEmail (metadata , name +"@" +domain );
59+ if (!name .isEmpty ()) {
60+ addEmail (metadata , name +"@" +domain );
61+ }
5962 }
6063 }
6164 matcher = PATTERN .matcher (zone .toText ());
@@ -74,17 +77,27 @@ private void addEmail(DocumentMetadata metadata, String email) {
7477
7578 for (DocumentAuthor a : metadata .getAuthors ()) {
7679 String [] names = a .getName ().split (" " );
77- for (String namePart : names ) {
78- if (namePart .length () > 2 && email .toLowerCase ().contains (namePart .toLowerCase ())) {
79- if (author == null ) {
80- author = a ;
81- break ;
82- } else {
83- one = false ;
80+ String fname = StringUtils .join (names , "" );
81+ if (fname .toLowerCase ().contains (email .toLowerCase ().replaceFirst ("@.*" , "" ))) {
82+ if (author == null ) {
83+ author = a ;
84+ break ;
85+ } else {
86+ one = false ;
87+ }
88+ } else {
89+ for (String namePart : names ) {
90+ if (namePart .length () > 2 && email .toLowerCase ().contains (namePart .toLowerCase ())) {
91+ if (author == null ) {
92+ author = a ;
93+ break ;
94+ } else {
95+ one = false ;
96+ }
8497 }
98+ }
8599 }
86100 }
87- }
88101
89102 if (author != null && one ) {
90103 author .addEmail (email );
0 commit comments