Skip to content

Commit d8a0f4b

Browse files
AlainODeaegulias
authored andcommitted
🐛 Valid domains won't validate (#11)
* 🐛 Valid domains won't validate - Allow valid domains with subdomains starting with numbers - Allow valid domains with hyphens * 🎨 Address @egulias code review - Extract method for characters not allowed in subdomains - Separate no warning domain tests from warning tests - Expand domain tests to cover unicode, leading num, and hyphens
1 parent 845e0d1 commit d8a0f4b

File tree

5 files changed

+96
-3
lines changed

5 files changed

+96
-3
lines changed

src/main/java/emailvalidator4j/lexer/EmailLexer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ public class EmailLexer {
1616

1717
public void lex(String input) {
1818
Pattern pattern = Pattern.compile(
19-
"(([a-zA-Z_]|[^\\u0000-\\u007F])+[46]?)|([0-9]+)|(\r\n)|(::)|(\\s+?)|(.)|(\\p{Cc}+)", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE
19+
"(([a-zA-Z0-9!#$%&'*+\\-/=?^_`{|}~]|[^\\u0000-\\u007F])+[46]?)|([0-9]+)|(\r\n)|(::)|(\\s+?)|(.)|(\\p{Cc}+)", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE
2020
);
2121
Matcher matcher = pattern.matcher(input);
2222

src/main/java/emailvalidator4j/parser/DomainPart.java

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
final class DomainPart extends Parser {
1414

15+
private static final Pattern subDomainIllegalCharacters = Pattern.compile("[^a-zA-Z0-9\\-\\x00007F-\\u10FFFF]");
1516
private static final int DOMAINPART_MAX_LENGTH = 255;
1617
private static final int LABEL_MAX_LENGTH = 63;
1718
private final HashSet<TokenInterface> notAllowedTokens = new HashSet<TokenInterface>(2) {{
@@ -112,13 +113,29 @@ private boolean isDomainLiteral(boolean openBrackets) throws UnclosedDomainLiter
112113
return this.lexer.getCurrent().equals(Tokens.OPENBRACKET);
113114
}
114115

115-
private void checkNotAllowedChars(TokenInterface token) throws DomainNotAllowedCharacter {
116+
private void checkNotAllowedChars(TokenInterface token) throws DomainNotAllowedCharacter, DomainHyphen {
116117
if (notAllowedTokens.contains(token)) {
117118
throw new DomainNotAllowedCharacter(
118119
String.format("%s is not allowed in domain part", token.getName())
119120
);
120121
}
122+
checkNotAllowedCharsInSubDomain(token);
123+
}
121124

125+
private void checkNotAllowedCharsInSubDomain(TokenInterface token) throws DomainHyphen, DomainNotAllowedCharacter {
126+
if (Tokens.GENERIC.equals(token.getName())) {
127+
String subDomain = token.getText();
128+
if (subDomain.charAt(0) == '-') {
129+
throw new DomainHyphen("Sub-domains cannot start with hyphen");
130+
}
131+
if (subDomain.charAt(Math.max(0, subDomain.length() - 1)) == '-') {
132+
throw new DomainHyphen("Sub-domains cannot end with hyphen");
133+
}
134+
Matcher matcher = subDomainIllegalCharacters.matcher(subDomain);
135+
if (matcher.find()) {
136+
throw new DomainNotAllowedCharacter(matcher.group());
137+
}
138+
}
122139
}
123140

124141
private void checkLabelLength() {

src/test/java/emailvalidator4j/EmailValidatorTest.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ public static Object[][] validEmailsProvider() {
8181
{"example((example))@fakedfake.co.uk"},
8282
{"example@faked(fake).co.uk"},
8383
84+
85+
86+
87+
8488
{"инфо@письмо.рф"},
8589
{"\"username\"@example.com"},
8690
{"\"user,name\"@example.com"},

src/test/java/emailvalidator4j/lexer/EmailLexerTest.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,17 @@ public void toStringReturnsLeftTokens() {
167167
Assert.assertTrue("@bar.com".equals(lexer.toString()));
168168
}
169169

170+
@Test
171+
@UseDataProvider("atextExamples")
172+
public void atextParsing(String atext) {
173+
EmailLexer lexer = new EmailLexer();
174+
lexer.lex(atext);
175+
TokenInterface token = lexer.getCurrent();
176+
Assert.assertEquals(Tokens.GENERIC, token.getName());
177+
Assert.assertEquals(atext, token.getText());
178+
179+
}
180+
170181
@Test
171182
@UseDataProvider("invalidUTF8StringsProvider")
172183
public void invalidUTF8CharsAreInvalidTokens(String utf8String) throws Exception {
@@ -178,6 +189,42 @@ public void invalidUTF8CharsAreInvalidTokens(String utf8String) throws Exception
178189
);
179190
}
180191

192+
/*
193+
* atext = UTF8-non-ascii /
194+
* ALPHA / DIGIT / ; Printable US-ASCII
195+
* "!" / "#" / ; characters not including
196+
* "$" / "%" / ; specials. Used for atoms.
197+
* "&" / "'" /
198+
* "*" / "+" /
199+
* "-" / "/" /
200+
* "=" / "?" /
201+
* "^" / "_" /
202+
* "`" / "{" /
203+
* "|" / "}" /
204+
* "~"
205+
*/
206+
@DataProvider
207+
public static Object[][] atextExamples() {
208+
return new Object[][] {
209+
{"a"},
210+
{"1"},
211+
{"!"},
212+
{"\uD83D"},
213+
{"aa"},
214+
{"a1"},
215+
{"a!"},
216+
{"a\uD83D"},
217+
{"aaa"},
218+
{"a1a"},
219+
{"a!a"},
220+
{"a\uD83D!"},
221+
{"aaa\uD83D"},
222+
{"a1a\uD83D"},
223+
{"a!a\uD83D"},
224+
{"a\uD83Da\uD83D"},
225+
};
226+
}
227+
181228
@DataProvider
182229
public static Object[][] invalidUTF8StringsProvider() throws Exception {
183230
ArrayList<ArrayList<String>> invalidStrings = new ArrayList<>();

src/test/java/emailvalidator4j/parser/DomainPartTest.java

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.junit.runner.RunWith;
1313

1414
import java.util.Arrays;
15+
import java.util.Collections;
1516
import java.util.List;
1617

1718
@RunWith(DataProviderRunner.class)
@@ -54,7 +55,7 @@ public static Object[][] invalidDomainParts() {
5455
{DomainNotAllowedCharacter.class, "@example\\"},
5556
{DomainNotAllowedCharacter.class, "@exa\\mple"},
5657
{UnclosedDomainLiteral.class, "@example]"},
57-
{ConsecutiveGeneric.class, "@example'"},
58+
{DomainNotAllowedCharacter.class, "@example'"},
5859
};
5960
}
6061

@@ -116,6 +117,30 @@ public static Object[][] domainPartWithWarnings() {
116117
};
117118
}
118119

120+
@Test
121+
@UseDataProvider("domainPartWithoutWarnings")
122+
public void domainPartHasNoWarnings(String domainPart) throws InvalidEmail {
123+
DomainPart parser = this.getDomainPartParser();
124+
parser.parse(domainPart);
125+
126+
Assert.assertEquals(Collections.emptyList(), parser.getWarnings());
127+
}
128+
129+
@DataProvider
130+
public static Object[][] domainPartWithoutWarnings() {
131+
return new Object[][]{
132+
{"@example.com"},
133+
{"@subdomain.example.com"},
134+
{"@has-hyphen.example.com"},
135+
{"@1leadingnumber.example.com"},
136+
{"@письмо.рф"},
137+
{"@example.co"},
138+
{"@subdomain.example.co"},
139+
{"@has-hyphen.example.co"},
140+
{"@1leadingnumber.example.co"},
141+
};
142+
}
143+
119144
@Test
120145
public void lexedDomainIsExposed() throws InvalidEmail {
121146
DomainPart parser = this.getDomainPartParser();

0 commit comments

Comments
 (0)