From 369cabf064e70d1e43604967cc9161f6abbf2f36 Mon Sep 17 00:00:00 2001 From: Brian Clozel Date: Fri, 17 Oct 2014 16:19:55 +0200 Subject: [PATCH] Conditionally htmlEscape chars based on encoding This commit adds new htmlEscape methods that take the character encoding as a parameter. According to specs and recommendations, the list of chars to be html escaped depends on the encoding used in the response. If the current char encoding supports chars natively, we shouldn't escape those; of course, reserved chars (<,>,',",&) should always be escaped. See: http://www.w3.org/TR/html4/sgml/entities.html#h-24.3 See: spring-projects/spring-framework#385 by @candrews Issue: SPR-9293 --- .../util/HtmlCharacterEntityReferences.java | 34 ++++++++- .../springframework/web/util/HtmlUtils.java | 70 +++++++++++++++++-- .../HtmlCharacterEntityReferencesTests.java | 14 ++++ .../web/util/HtmlUtilsTests.java | 22 ++++++ 4 files changed, 133 insertions(+), 7 deletions(-) diff --git a/spring-web/src/main/java/org/springframework/web/util/HtmlCharacterEntityReferences.java b/spring-web/src/main/java/org/springframework/web/util/HtmlCharacterEntityReferences.java index b23903b15f..b8c3350b9a 100644 --- a/spring-web/src/main/java/org/springframework/web/util/HtmlCharacterEntityReferences.java +++ b/spring-web/src/main/java/org/springframework/web/util/HtmlCharacterEntityReferences.java @@ -1,5 +1,5 @@ /* - * Copyright 2002-2012 the original author or authors. + * Copyright 2002-2014 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -107,14 +107,42 @@ class HtmlCharacterEntityReferences { * Return true if the given character is mapped to a supported entity reference. */ public boolean isMappedToReference(char character) { - return (convertToReference(character) != null); + return isMappedToReference(character, WebUtils.DEFAULT_CHARACTER_ENCODING); + } + + /** + * Return true if the given character is mapped to a supported entity reference. + */ + public boolean isMappedToReference(char character, String encoding) { + return (convertToReference(character, encoding) != null); } /** * Return the reference mapped to the given character or {@code null}. */ public String convertToReference(char character) { - if (character < 1000 || (character >= 8000 && character < 10000)) { + return convertToReference(character, WebUtils.DEFAULT_CHARACTER_ENCODING); + } + + /** + * Return the reference mapped to the given character or {@code null}. + */ + public String convertToReference(char character, String encoding) { + if(encoding.startsWith("UTF-")){ + switch(character){ + case '<': + return "<"; + case '>': + return ">"; + case '"': + return """; + case '&': + return "&"; + case '\'': + return "'"; + } + } + else if (character < 1000 || (character >= 8000 && character < 10000)) { int index = (character < 1000 ? character : character - 7000); String entityReference = this.characterToEntityReferenceMap[index]; if (entityReference != null) { diff --git a/spring-web/src/main/java/org/springframework/web/util/HtmlUtils.java b/spring-web/src/main/java/org/springframework/web/util/HtmlUtils.java index 2c93058c5b..307de87bc1 100644 --- a/spring-web/src/main/java/org/springframework/web/util/HtmlUtils.java +++ b/spring-web/src/main/java/org/springframework/web/util/HtmlUtils.java @@ -1,5 +1,5 @@ /* - * Copyright 2002-2012 the original author or authors. + * Copyright 2002-2014 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ package org.springframework.web.util; +import org.springframework.util.Assert; + /** * Utility class for HTML escaping. Escapes and unescapes * based on the W3C HTML 4.01 recommendation, handling @@ -57,13 +59,33 @@ public abstract class HtmlUtils { * @return the escaped string */ public static String htmlEscape(String input) { + return htmlEscape(input, WebUtils.DEFAULT_CHARACTER_ENCODING); + } + + /** + * Turn special characters into HTML character references. + * Handles complete character set defined in HTML 4.01 recommendation. + *

Escapes all special characters to their corresponding + * entity reference (e.g. {@code <}) at least as required by the + * specified encoding. In other words, if a special character does + * not have to be escaped for the given encoding, it may not be. + *

Reference: + * + * http://www.w3.org/TR/html4/sgml/entities.html + * + * @param input the (unescaped) input string + * @param encoding The name of a supported {@link java.nio.charset.Charset charset} + * @return the escaped string + */ + public static String htmlEscape(String input, String encoding) { + Assert.notNull(encoding, "encoding is required"); if (input == null) { return null; } StringBuilder escaped = new StringBuilder(input.length() * 2); for (int i = 0; i < input.length(); i++) { char character = input.charAt(i); - String reference = characterEntityReferences.convertToReference(character); + String reference = characterEntityReferences.convertToReference(character, encoding); if (reference != null) { escaped.append(reference); } @@ -87,13 +109,33 @@ public abstract class HtmlUtils { * @return the escaped string */ public static String htmlEscapeDecimal(String input) { + return htmlEscapeDecimal(input, WebUtils.DEFAULT_CHARACTER_ENCODING); + } + + /** + * Turn special characters into HTML character references. + * Handles complete character set defined in HTML 4.01 recommendation. + *

Escapes all special characters to their corresponding numeric + * reference in decimal format (&#Decimal;) at least as required by the + * specified encoding. In other words, if a special character does + * not have to be escaped for the given encoding, it may not be. + *

Reference: + * + * http://www.w3.org/TR/html4/sgml/entities.html + * + * @param input the (unescaped) input string + * @param encoding The name of a supported {@link java.nio.charset.Charset charset} + * @return the escaped string + */ + public static String htmlEscapeDecimal(String input, String encoding) { + Assert.notNull(encoding, "encoding is required"); if (input == null) { return null; } StringBuilder escaped = new StringBuilder(input.length() * 2); for (int i = 0; i < input.length(); i++) { char character = input.charAt(i); - if (characterEntityReferences.isMappedToReference(character)) { + if (characterEntityReferences.isMappedToReference(character, encoding)) { escaped.append(HtmlCharacterEntityReferences.DECIMAL_REFERENCE_START); escaped.append((int) character); escaped.append(HtmlCharacterEntityReferences.REFERENCE_END); @@ -118,13 +160,33 @@ public abstract class HtmlUtils { * @return the escaped string */ public static String htmlEscapeHex(String input) { + return htmlEscapeHex(input, WebUtils.DEFAULT_CHARACTER_ENCODING); + } + + /** + * Turn special characters into HTML character references. + * Handles complete character set defined in HTML 4.01 recommendation. + *

Escapes all special characters to their corresponding numeric + * reference in hex format (&#xHex;) at least as required by the + * specified encoding. In other words, if a special character does + * not have to be escaped for the given encoding, it may not be. + *

Reference: + * + * http://www.w3.org/TR/html4/sgml/entities.html + * + * @param input the (unescaped) input string + * @param encoding The name of a supported {@link java.nio.charset.Charset charset} + * @return the escaped string + */ + public static String htmlEscapeHex(String input, String encoding) { + Assert.notNull(encoding, "encoding is required"); if (input == null) { return null; } StringBuilder escaped = new StringBuilder(input.length() * 2); for (int i = 0; i < input.length(); i++) { char character = input.charAt(i); - if (characterEntityReferences.isMappedToReference(character)) { + if (characterEntityReferences.isMappedToReference(character, encoding)) { escaped.append(HtmlCharacterEntityReferences.HEX_REFERENCE_START); escaped.append(Integer.toString(character, 16)); escaped.append(HtmlCharacterEntityReferences.REFERENCE_END); diff --git a/spring-web/src/test/java/org/springframework/web/util/HtmlCharacterEntityReferencesTests.java b/spring-web/src/test/java/org/springframework/web/util/HtmlCharacterEntityReferencesTests.java index 2cbe50e680..f0b2c1c999 100644 --- a/spring-web/src/test/java/org/springframework/web/util/HtmlCharacterEntityReferencesTests.java +++ b/spring-web/src/test/java/org/springframework/web/util/HtmlCharacterEntityReferencesTests.java @@ -76,6 +76,20 @@ public class HtmlCharacterEntityReferencesTests { (char) -1, entityReferences.convertToCharacter("invalid")); } + // SPR-9293 + @Test + public void testConvertToReferenceUTF8() { + HtmlCharacterEntityReferences entityReferences = new HtmlCharacterEntityReferences(); + String utf8 = "UTF-8"; + assertEquals("<", entityReferences.convertToReference('<', utf8)); + assertEquals(">", entityReferences.convertToReference('>', utf8)); + assertEquals("&", entityReferences.convertToReference('&', utf8)); + assertEquals(""", entityReferences.convertToReference('"', utf8)); + assertEquals("'", entityReferences.convertToReference('\'', utf8)); + assertNull(entityReferences.convertToReference((char) 233, utf8)); + assertNull(entityReferences.convertToReference((char) 934, utf8)); + } + private Map getReferenceCharacterMap() { CharacterEntityResourceIterator entityIterator = new CharacterEntityResourceIterator(); Map referencedCharactersMap = new HashMap(); diff --git a/spring-web/src/test/java/org/springframework/web/util/HtmlUtilsTests.java b/spring-web/src/test/java/org/springframework/web/util/HtmlUtilsTests.java index 26d8f711b3..e752f64a7a 100644 --- a/spring-web/src/test/java/org/springframework/web/util/HtmlUtilsTests.java +++ b/spring-web/src/test/java/org/springframework/web/util/HtmlUtilsTests.java @@ -71,6 +71,28 @@ public class HtmlUtilsTests { "ϑ", HtmlUtils.htmlEscapeDecimal("" + (char) 977)); } + // SPR-9293 + @Test + public void testEncodeIntoHtmlCharacterSetFromUtf8() { + String utf8 = ("UTF-8"); + assertNull("A null string should be converted to a null string", + HtmlUtils.htmlEscape(null, utf8)); + assertEquals("An empty string should be converted to an empty string", + "", HtmlUtils.htmlEscape("", utf8)); + assertEquals("A string containing no special characters should not be affected", + "A sentence containing no special characters.", + HtmlUtils.htmlEscape("A sentence containing no special characters.")); + + assertEquals("'< >' should be encoded to '< >'", + "< >", HtmlUtils.htmlEscape("< >", utf8)); + assertEquals("'< >' should be encoded to '< >'", + "< >", HtmlUtils.htmlEscapeDecimal("< >", utf8)); + + assertEquals("UTF-8 supported chars should not be escaped", + "Μερικοί Ελληνικοί "χαρακτήρες"", + HtmlUtils.htmlEscape("Μερικοί Ελληνικοί \"χαρακτήρες\"", utf8)); + } + @Test public void testDecodeFromHtmlCharacterSet() { assertNull("A null string should be converted to a null string",