diff --git a/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java b/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java index 43815f6..1b6bc10 100644 --- a/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java +++ b/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java @@ -154,7 +154,6 @@ private void readDefault() { while (!_reader.eof()) { //read the next char to process. char curr = _reader.read(); - switch (curr) { case ' ': //space was found, check if it's a valid single level domain. @@ -277,10 +276,16 @@ private void readDefault() { private int processColon(int length) { if (_hasScheme) { //read it as username/password if it has scheme - if (!readUserPass(length) && _buffer.length() > 0) { + if (!readUserPass(length)) { //unread the ":" so that the domain reader can process it _reader.goBack(); - _buffer.delete(_buffer.length() - 1, _buffer.length()); + + // Check buffer length before clearing it; set length to 0 if buffer is empty + if (_buffer.length() > 0) { + _buffer.delete(_buffer.length() - 1, _buffer.length()); + } else { + length = 0; + } int backtrackOnFail = _reader.getPosition() - _buffer.length() + length; if (!readDomainName(_buffer.substring(length))) { @@ -289,6 +294,8 @@ private int processColon(int length) { readEnd(ReadEndState.InvalidUrl); } length = 0; + } else { + length = 0; } } else if (readScheme() && _buffer.length() > 0) { _hasScheme = true; @@ -470,10 +477,9 @@ private boolean readScheme() { * @return True if a valid username and password was found. */ private boolean readUserPass(int beginningOfUsername) { - //The start of where we are. int start = _buffer.length(); - + //keep looping until "done" boolean done = false;