Conditionally htmlEscape chars based on encoding

This commit adds new htmlEscape methods that take the character encoding
as a parameter. According to specs and recommendations, the list of
chars to be html escaped depends on the encoding used in the response.
If the current char encoding supports chars natively, we shouldn't
escape those; of course, reserved chars (<,>,',",&) should always be
escaped.

See: http://www.w3.org/TR/html4/sgml/entities.html#h-24.3
See: spring-projects/spring-framework#385 by @candrews

Issue: SPR-9293
This commit is contained in:
Brian Clozel
2014-10-17 16:19:55 +02:00
parent 4d3ade563a
commit 369cabf064
4 changed files with 133 additions and 7 deletions

View File

@@ -76,6 +76,20 @@ public class HtmlCharacterEntityReferencesTests {
(char) -1, entityReferences.convertToCharacter("invalid"));
}
// SPR-9293
@Test
public void testConvertToReferenceUTF8() {
HtmlCharacterEntityReferences entityReferences = new HtmlCharacterEntityReferences();
String utf8 = "UTF-8";
assertEquals("&lt;", entityReferences.convertToReference('<', utf8));
assertEquals("&gt;", entityReferences.convertToReference('>', utf8));
assertEquals("&amp;", entityReferences.convertToReference('&', utf8));
assertEquals("&quot;", entityReferences.convertToReference('"', utf8));
assertEquals("&#39;", entityReferences.convertToReference('\'', utf8));
assertNull(entityReferences.convertToReference((char) 233, utf8));
assertNull(entityReferences.convertToReference((char) 934, utf8));
}
private Map<Integer, String> getReferenceCharacterMap() {
CharacterEntityResourceIterator entityIterator = new CharacterEntityResourceIterator();
Map<Integer, String> referencedCharactersMap = new HashMap<Integer, String>();

View File

@@ -71,6 +71,28 @@ public class HtmlUtilsTests {
"&#977;", HtmlUtils.htmlEscapeDecimal("" + (char) 977));
}
// SPR-9293
@Test
public void testEncodeIntoHtmlCharacterSetFromUtf8() {
String utf8 = ("UTF-8");
assertNull("A null string should be converted to a null string",
HtmlUtils.htmlEscape(null, utf8));
assertEquals("An empty string should be converted to an empty string",
"", HtmlUtils.htmlEscape("", utf8));
assertEquals("A string containing no special characters should not be affected",
"A sentence containing no special characters.",
HtmlUtils.htmlEscape("A sentence containing no special characters."));
assertEquals("'< >' should be encoded to '&lt; &gt;'",
"&lt; &gt;", HtmlUtils.htmlEscape("< >", utf8));
assertEquals("'< >' should be encoded to '&#60; &#62;'",
"&#60; &#62;", HtmlUtils.htmlEscapeDecimal("< >", utf8));
assertEquals("UTF-8 supported chars should not be escaped",
"Μερικοί Ελληνικοί &quot;χαρακτήρες&quot;",
HtmlUtils.htmlEscape("Μερικοί Ελληνικοί \"χαρακτήρες\"", utf8));
}
@Test
public void testDecodeFromHtmlCharacterSet() {
assertNull("A null string should be converted to a null string",