PathPatternParser encodes patterns as it parses them

Before this commit there was no special handling for URL encoding
of the path pattern string coming into the path pattern parser. No
assumptions were made about it being in an encoded form or not.

With this change it is assumed incoming path patterns are not
encoded and as part of parsing the parser builds PathPattern
objects that include encoded elements. For example parsing "/f o"
will create a path pattern of the form "/f%20o". In this form
it can then be used to match against encoded paths.

Handling encoded characters is not trivial and has resulted in
some loss in matching speed but care has been taken to
avoid unnecessary creation of additional heap objects.  When
matching variables the variable values are return in a
decoded form. It is hoped the speed can be recovered, at least
for the common case of non-encoded incoming paths.

Issue: SPR-15640
This commit is contained in:
Andy Clement
2017-06-09 11:44:32 -07:00
parent c0550f7eb6
commit ff2af660cf
9 changed files with 332 additions and 44 deletions

View File

@@ -64,8 +64,6 @@ public class PathPatternMatcherTests {
checkNoMatch("foo", "foobar");
checkMatches("/foo/bar", "/foo/bar");
checkNoMatch("/foo/bar", "/foo/baz");
// TODO Need more tests for escaped separators in path patterns and paths?
checkMatches("/foo\\/bar", "/foo\\/bar"); // chain string is Separator(/) Literal(foo\) Separator(/) Literal(bar)
}
@Test
@@ -254,6 +252,108 @@ public class PathPatternMatcherTests {
assertEquals("a/",parse("/").getPathRemaining("/a/").getPathRemaining());
assertEquals("/bar",parse("/a{abc}").getPathRemaining("/a/bar").getPathRemaining());
}
@Test
public void encodingAndBoundVariablesCapturePathElement() {
checkCapture("{var}","f%20o","var","f o");
checkCapture("{var1}/{var2}","f%20o/f%7Co","var1","f o","var2","f|o");
checkCapture("{var1}/{var2}","f%20o/f%7co","var1","f o","var2","f|o"); // lower case encoding
// constraints
// - constraint is expressed in non encoded form
// - returned values are decoded
checkCapture("{var:foo}","foo","var","foo");
checkCapture("{var:f o}","f%20o","var","f o"); // constraint is expressed in non encoded form
checkCapture("{var:f.o}","f%20o","var","f o");
checkCapture("{var:f\\|o}","f%7co","var","f|o");
}
@Test
public void encodingAndBoundVariablesCaptureTheRestPathElement() {
checkCapture("/{*var}","/f%20o","var","/f o");
checkCapture("{var1}/{*var2}","f%20o/f%7Co","var1","f o","var2","/f|o");
// constraints - decoding happens for constraint checking but returned value is undecoded
checkCapture("/{*var}","/foo","var","/foo");
checkCapture("/{*var}","/f%20o","var","/f o"); // constraint is expressed in non encoded form
checkCapture("/{*var}","/f%20o","var","/f o");
checkCapture("/{*var}","/f%7co","var","/f|o");
}
@Test
public void encodingWithCaseSensitivity() {
// Concern here is that regardless of case sensitivity, %7c == %7C (for example)
// Need to test all path elements that might have literal components
PathPatternParser ppp = new PathPatternParser();
ppp.setCaseSensitive(true);
// LiteralPathElement
PathPattern pp = ppp.parse("/this is a |");
assertTrue(pp.matches("/this%20is%20a%20%7C"));
assertTrue(pp.matches("/this%20is%20a%20%7c"));
assertFalse(pp.matches("/thIs%20is%20a%20%7c"));
assertFalse(pp.matches("/thIs%20is%20a%20%7C"));
assertEquals("Separator(/) Literal(this%20is%20a%20%7C)",pp.toChainString());
// RegexPathElement
pp = ppp.parse("/{foo}this is a |");
assertTrue(pp.matches("/xxxthis%20is%20a%20%7C"));
assertTrue(pp.matches("/xxxthis%20is%20a%20%7c"));
assertFalse(pp.matches("/xxxXhis%20is%20a%20%7C"));
assertFalse(pp.matches("/xxxXhis%20is%20a%20%7c"));
assertEquals("Separator(/) Regex({foo}this%20is%20a%20%7C)",pp.toChainString());
// SingleCharWildcardedPathElement
pp = ppp.parse("/th?s is a |");
assertTrue(pp.matches("/this%20is%20a%20%7C"));
assertTrue(pp.matches("/this%20is%20a%20%7c"));
assertFalse(pp.matches("/xhis%20is%20a%20%7C"));
assertFalse(pp.matches("/xhis%20is%20a%20%7c"));
assertEquals("Separator(/) SingleCharWildcarded(th?s%20is%20a%20%7C)",pp.toChainString());
ppp = new PathPatternParser();
ppp.setCaseSensitive(false);
// LiteralPathElement
pp = ppp.parse("/this is a |");
assertTrue(pp.matches("/this%20is%20a%20%7C"));
assertTrue(pp.matches("/this%20is%20a%20%7c"));
assertTrue(pp.matches("/thIs%20is%20a%20%7C"));
assertTrue(pp.matches("/tHis%20is%20a%20%7c"));
// For case insensitive matches we make all the chars lower case
assertEquals("Separator(/) Literal(this%20is%20a%20%7c)",pp.toChainString());
// RegexPathElement
pp = ppp.parse("/{foo}this is a |");
assertTrue(pp.matches("/xxxthis%20is%20a%20%7C"));
assertTrue(pp.matches("/xxxthis%20is%20a%20%7c"));
assertTrue(pp.matches("/xxxThis%20is%20a%20%7C"));
assertTrue(pp.matches("/xxxThis%20is%20a%20%7c"));
assertEquals("Separator(/) Regex({foo}this%20is%20a%20%7C)",pp.toChainString());
// SingleCharWildcardedPathElement
pp = ppp.parse("/th?s is a |");
assertTrue(pp.matches("/this%20is%20a%20%7C"));
assertTrue(pp.matches("/this%20is%20a%20%7c"));
assertTrue(pp.matches("/This%20is%20a%20%7C"));
assertTrue(pp.matches("/This%20is%20a%20%7c"));
assertEquals("Separator(/) SingleCharWildcarded(th?s%20is%20a%20%7c)",pp.toChainString());
}
@Test
public void encodingAndBoundVariablesRegexPathElement() {
checkCapture("/{var1:f o}_ _{var2}","/f%20o_%20_f%7co","var1","f o","var2","f|o");
checkCapture("/{var1}_{var2}","/f%20o_foo","var1","f o","var2","foo");
checkCapture("/{var1}_ _{var2}","/f%20o_%20_f%7co","var1","f o","var2","f|o");
checkCapture("/{var1}_ _{var2:f\\|o}","/f%20o_%20_f%7co","var1","f o","var2","f|o");
checkCapture("/{var1:f o}_ _{var2:f\\|o}","/f%20o_%20_f%7co","var1","f o","var2","f|o");
}
@Test
public void encodedPaths() {
checkMatches("/foo bar", "/foo%20bar");
checkMatches("/foo*bar", "/fooboobar");
checkMatches("/f?o","/f%7co");
}
@Test
public void pathRemainingCornerCases_spr15336() {
@@ -303,6 +403,7 @@ public class PathPatternMatcherTests {
checkNoMatch("tes?", "tsst");
checkMatches(".?.a", ".a.a");
checkNoMatch(".?.a", ".aba");
checkMatches("/f?o/bar","/f%20o/bar");
}
@Test

View File

@@ -22,20 +22,7 @@ import java.util.List;
import java.util.Map;
import org.junit.Test;
import org.springframework.web.util.pattern.CaptureTheRestPathElement;
import org.springframework.web.util.pattern.CaptureVariablePathElement;
import org.springframework.web.util.pattern.LiteralPathElement;
import org.springframework.web.util.pattern.PathElement;
import org.springframework.web.util.pattern.PathPattern;
import org.springframework.web.util.pattern.PathPatternParser;
import org.springframework.web.util.pattern.PatternParseException;
import org.springframework.web.util.pattern.PatternParseException.PatternMessage;
import org.springframework.web.util.pattern.RegexPathElement;
import org.springframework.web.util.pattern.SeparatorPathElement;
import org.springframework.web.util.pattern.SingleCharWildcardedPathElement;
import org.springframework.web.util.pattern.WildcardPathElement;
import org.springframework.web.util.pattern.WildcardTheRestPathElement;
import static org.junit.Assert.*;
@@ -82,7 +69,7 @@ public class PathPatternParserTests {
assertEquals("Literal(abc)", checkStructure("abc").toChainString());
assertEquals("Regex({a}_*_{b})", checkStructure("{a}_*_{b}").toChainString());
assertEquals("Separator(/)", checkStructure("/").toChainString());
assertEquals("SingleCharWildcarding(?a?b?c)", checkStructure("?a?b?c").toChainString());
assertEquals("SingleCharWildcarded(?a?b?c)", checkStructure("?a?b?c").toChainString());
assertEquals("Wildcard(*)", checkStructure("*").toChainString());
assertEquals("WildcardTheRest(/**)", checkStructure("/**").toChainString());
}
@@ -100,6 +87,7 @@ public class PathPatternParserTests {
checkError("/{*foobar}abc", 10, PatternMessage.NO_MORE_DATA_EXPECTED_AFTER_CAPTURE_THE_REST);
checkError("/{f*oobar}", 3, PatternMessage.ILLEGAL_CHARACTER_IN_CAPTURE_DESCRIPTOR);
checkError("/{*foobar}/abc", 10, PatternMessage.NO_MORE_DATA_EXPECTED_AFTER_CAPTURE_THE_REST);
checkError("/{*foobar:.*}/abc", 9, PatternMessage.ILLEGAL_CHARACTER_IN_CAPTURE_DESCRIPTOR);
checkError("/{abc}{*foobar}", 1, PatternMessage.CAPTURE_ALL_IS_STANDALONE_CONSTRUCT);
checkError("/{abc}{*foobar}{foo}", 15, PatternMessage.NO_MORE_DATA_EXPECTED_AFTER_CAPTURE_THE_REST);
}
@@ -193,6 +181,73 @@ public class PathPatternParserTests {
checkStructure("/{f}/");
checkStructure("/{foo}/{bar}/{wibble}");
}
/**
* During a parse some elements of the path are encoded for use when matching an encoded path.
* The patterns a developer writes are not encoded, hence we decode them when turning them
* into PathPattern objects. The encoding is visible through the toChainString() method.
*/
@Test
public void encodingDuringParse() throws Exception {
PathPattern pp;
// CaptureTheRest
pp = parse("/{*var}");
assertEquals("CaptureTheRest(/{*var})",pp.toChainString());
// CaptureVariable
pp = parse("/{var}");
assertEquals("Separator(/) CaptureVariable({var})",pp.toChainString());
// Literal
pp = parse("/foo bar/b_oo");
assertEquals("Separator(/) Literal(foo%20bar) Separator(/) Literal(b_oo)",pp.toChainString());
pp = parse("foo:bar");
assertEquals("Literal(foo:bar)",pp.toChainString());
// Regex
pp = parse("{foo}_{bar}");
assertEquals("Regex({foo}_{bar})",pp.toChainString());
pp = parse("{foo}_ _{bar}");
assertEquals("Regex({foo}_%20_{bar})",pp.toChainString());
// Separator
pp = parse("/");
assertEquals("Separator(/)",pp.toChainString());
// SingleCharWildcarded
pp = parse("/foo?bar");
assertEquals("Separator(/) SingleCharWildcarded(foo?bar)",pp.toChainString());
pp = parse("/f o?bar");
assertEquals("Separator(/) SingleCharWildcarded(f%20o?bar)",pp.toChainString());
// Wildcard
pp = parse("/foo*bar");
assertEquals("Separator(/) Regex(foo*bar)",pp.toChainString());
pp = parse("f oo:*bar");
assertEquals("Regex(f%20oo:*bar)",pp.toChainString());
pp = parse("/f oo:*bar");
assertEquals("Separator(/) Regex(f%20oo:*bar)",pp.toChainString());
pp = parse("/f|!oo:*bar");
assertEquals("Separator(/) Regex(f%7C!oo:*bar)",pp.toChainString());
// WildcardTheRest
pp = parse("/**");
assertEquals("WildcardTheRest(/**)",pp.toChainString());
}
@Test
public void encodingWithConstraints() {
// Constraint regex expressions are not URL encoded
PathPattern pp = parse("/{var:f o}");
assertEquals("Separator(/) CaptureVariable({var:f o})",pp.toChainString());
pp = parse("/{var:f o}_");
assertEquals("Separator(/) Regex({var:f o}_)",pp.toChainString());
pp = parse("{foo:f o}_ _{bar:b\\|o}");
assertEquals("Regex({foo:f o}_%20_{bar:b\\|o})",pp.toChainString());
}
@Test
public void completeCaptureWithConstraints() {