From 77be8178483003a4caa6079a10f2aba2d38eef09 Mon Sep 17 00:00:00 2001 From: Ben East Date: Mon, 3 Jul 2017 15:57:17 -0500 Subject: [PATCH 1/3] [Issue #16] Updated process colon to solve StringIndexOutOfBoundsException when input string contains the sub-string "//:@." --- .../com/linkedin/urls/detection/UrlDetector.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java b/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java index 43815f6..1098ba9 100644 --- a/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java +++ b/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java @@ -154,7 +154,6 @@ private void readDefault() { while (!_reader.eof()) { //read the next char to process. char curr = _reader.read(); - switch (curr) { case ' ': //space was found, check if it's a valid single level domain. @@ -188,7 +187,8 @@ private void readDefault() { case '\uFF61': case '.': //"." was found, read the domain name using the start from length. _buffer.append(curr); - readDomainName(_buffer.substring(length)); + String substring = _buffer.substring(length); + readDomainName(substring); length = 0; break; case '@': //Check the domain name after a username @@ -277,10 +277,15 @@ private void readDefault() { private int processColon(int length) { if (_hasScheme) { //read it as username/password if it has scheme - if (!readUserPass(length) && _buffer.length() > 0) { + if (!readUserPass(length)) { //unread the ":" so that the domain reader can process it _reader.goBack(); - _buffer.delete(_buffer.length() - 1, _buffer.length()); + + if (_buffer.length() > 0) { + _buffer.delete(_buffer.length() - 1, _buffer.length()); + } else { + length = 0; + } int backtrackOnFail = _reader.getPosition() - _buffer.length() + length; if (!readDomainName(_buffer.substring(length))) { @@ -470,10 +475,9 @@ private boolean readScheme() { * @return True if a valid username and password was found. */ private boolean readUserPass(int beginningOfUsername) { - //The start of where we are. int start = _buffer.length(); - + //keep looping until "done" boolean done = false; From 9f267214885c6f82fad0915ddb42db33fbddccd2 Mon Sep 17 00:00:00 2001 From: Ben East Date: Mon, 3 Jul 2017 15:59:19 -0500 Subject: [PATCH 2/3] [Issue #16] Updated process colon to solve StringIndexOutOfBoundsException when input string contains the sub-string "//:@." --- .../src/main/java/com/linkedin/urls/detection/UrlDetector.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java b/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java index 1098ba9..f85873c 100644 --- a/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java +++ b/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java @@ -187,8 +187,7 @@ private void readDefault() { case '\uFF61': case '.': //"." was found, read the domain name using the start from length. _buffer.append(curr); - String substring = _buffer.substring(length); - readDomainName(substring); + readDomainName(_buffer.substring(length)); length = 0; break; case '@': //Check the domain name after a username From d405160c26b3e041fc3478ff09b6784dba3ef7bc Mon Sep 17 00:00:00 2001 From: Ben East Date: Mon, 3 Jul 2017 16:42:06 -0500 Subject: [PATCH 3/3] [Issue #12] Fixed StringIndexOutOfBoundsException when given 'http://user:pass@host.com host.com' as input string Updated url-detector.processColon to set length to 0 when readUserPass(length) is true. All unit tests passed after this update. --- .../main/java/com/linkedin/urls/detection/UrlDetector.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java b/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java index f85873c..1b6bc10 100644 --- a/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java +++ b/url-detector/src/main/java/com/linkedin/urls/detection/UrlDetector.java @@ -280,10 +280,11 @@ private int processColon(int length) { //unread the ":" so that the domain reader can process it _reader.goBack(); + // Check buffer length before clearing it; set length to 0 if buffer is empty if (_buffer.length() > 0) { - _buffer.delete(_buffer.length() - 1, _buffer.length()); + _buffer.delete(_buffer.length() - 1, _buffer.length()); } else { - length = 0; + length = 0; } int backtrackOnFail = _reader.getPosition() - _buffer.length() + length; @@ -293,6 +294,8 @@ private int processColon(int length) { readEnd(ReadEndState.InvalidUrl); } length = 0; + } else { + length = 0; } } else if (readScheme() && _buffer.length() > 0) { _hasScheme = true;