diff --git a/spring-web/src/main/java/org/springframework/web/util/HtmlCharacterEntityReferences.java b/spring-web/src/main/java/org/springframework/web/util/HtmlCharacterEntityReferences.java index b23903b15f..b8c3350b9a 100644 --- a/spring-web/src/main/java/org/springframework/web/util/HtmlCharacterEntityReferences.java +++ b/spring-web/src/main/java/org/springframework/web/util/HtmlCharacterEntityReferences.java @@ -1,5 +1,5 @@ /* - * Copyright 2002-2012 the original author or authors. + * Copyright 2002-2014 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -107,14 +107,42 @@ class HtmlCharacterEntityReferences { * Return true if the given character is mapped to a supported entity reference. */ public boolean isMappedToReference(char character) { - return (convertToReference(character) != null); + return isMappedToReference(character, WebUtils.DEFAULT_CHARACTER_ENCODING); + } + + /** + * Return true if the given character is mapped to a supported entity reference. + */ + public boolean isMappedToReference(char character, String encoding) { + return (convertToReference(character, encoding) != null); } /** * Return the reference mapped to the given character or {@code null}. */ public String convertToReference(char character) { - if (character < 1000 || (character >= 8000 && character < 10000)) { + return convertToReference(character, WebUtils.DEFAULT_CHARACTER_ENCODING); + } + + /** + * Return the reference mapped to the given character or {@code null}. + */ + public String convertToReference(char character, String encoding) { + if(encoding.startsWith("UTF-")){ + switch(character){ + case '<': + return "<"; + case '>': + return ">"; + case '"': + return """; + case '&': + return "&"; + case '\'': + return "'"; + } + } + else if (character < 1000 || (character >= 8000 && character < 10000)) { int index = (character < 1000 ? character : character - 7000); String entityReference = this.characterToEntityReferenceMap[index]; if (entityReference != null) { diff --git a/spring-web/src/main/java/org/springframework/web/util/HtmlUtils.java b/spring-web/src/main/java/org/springframework/web/util/HtmlUtils.java index 2c93058c5b..307de87bc1 100644 --- a/spring-web/src/main/java/org/springframework/web/util/HtmlUtils.java +++ b/spring-web/src/main/java/org/springframework/web/util/HtmlUtils.java @@ -1,5 +1,5 @@ /* - * Copyright 2002-2012 the original author or authors. + * Copyright 2002-2014 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ package org.springframework.web.util; +import org.springframework.util.Assert; + /** * Utility class for HTML escaping. Escapes and unescapes * based on the W3C HTML 4.01 recommendation, handling @@ -57,13 +59,33 @@ public abstract class HtmlUtils { * @return the escaped string */ public static String htmlEscape(String input) { + return htmlEscape(input, WebUtils.DEFAULT_CHARACTER_ENCODING); + } + + /** + * Turn special characters into HTML character references. + * Handles complete character set defined in HTML 4.01 recommendation. + *

Escapes all special characters to their corresponding + * entity reference (e.g. {@code <}) at least as required by the + * specified encoding. In other words, if a special character does + * not have to be escaped for the given encoding, it may not be. + *

Reference: + * + * http://www.w3.org/TR/html4/sgml/entities.html + * + * @param input the (unescaped) input string + * @param encoding The name of a supported {@link java.nio.charset.Charset charset} + * @return the escaped string + */ + public static String htmlEscape(String input, String encoding) { + Assert.notNull(encoding, "encoding is required"); if (input == null) { return null; } StringBuilder escaped = new StringBuilder(input.length() * 2); for (int i = 0; i < input.length(); i++) { char character = input.charAt(i); - String reference = characterEntityReferences.convertToReference(character); + String reference = characterEntityReferences.convertToReference(character, encoding); if (reference != null) { escaped.append(reference); } @@ -87,13 +109,33 @@ public abstract class HtmlUtils { * @return the escaped string */ public static String htmlEscapeDecimal(String input) { + return htmlEscapeDecimal(input, WebUtils.DEFAULT_CHARACTER_ENCODING); + } + + /** + * Turn special characters into HTML character references. + * Handles complete character set defined in HTML 4.01 recommendation. + *

Escapes all special characters to their corresponding numeric + * reference in decimal format (&#Decimal;) at least as required by the + * specified encoding. In other words, if a special character does + * not have to be escaped for the given encoding, it may not be. + *

Reference: + * + * http://www.w3.org/TR/html4/sgml/entities.html + * + * @param input the (unescaped) input string + * @param encoding The name of a supported {@link java.nio.charset.Charset charset} + * @return the escaped string + */ + public static String htmlEscapeDecimal(String input, String encoding) { + Assert.notNull(encoding, "encoding is required"); if (input == null) { return null; } StringBuilder escaped = new StringBuilder(input.length() * 2); for (int i = 0; i < input.length(); i++) { char character = input.charAt(i); - if (characterEntityReferences.isMappedToReference(character)) { + if (characterEntityReferences.isMappedToReference(character, encoding)) { escaped.append(HtmlCharacterEntityReferences.DECIMAL_REFERENCE_START); escaped.append((int) character); escaped.append(HtmlCharacterEntityReferences.REFERENCE_END); @@ -118,13 +160,33 @@ public abstract class HtmlUtils { * @return the escaped string */ public static String htmlEscapeHex(String input) { + return htmlEscapeHex(input, WebUtils.DEFAULT_CHARACTER_ENCODING); + } + + /** + * Turn special characters into HTML character references. + * Handles complete character set defined in HTML 4.01 recommendation. + *

Escapes all special characters to their corresponding numeric + * reference in hex format (&#xHex;) at least as required by the + * specified encoding. In other words, if a special character does + * not have to be escaped for the given encoding, it may not be. + *

Reference: + * + * http://www.w3.org/TR/html4/sgml/entities.html + * + * @param input the (unescaped) input string + * @param encoding The name of a supported {@link java.nio.charset.Charset charset} + * @return the escaped string + */ + public static String htmlEscapeHex(String input, String encoding) { + Assert.notNull(encoding, "encoding is required"); if (input == null) { return null; } StringBuilder escaped = new StringBuilder(input.length() * 2); for (int i = 0; i < input.length(); i++) { char character = input.charAt(i); - if (characterEntityReferences.isMappedToReference(character)) { + if (characterEntityReferences.isMappedToReference(character, encoding)) { escaped.append(HtmlCharacterEntityReferences.HEX_REFERENCE_START); escaped.append(Integer.toString(character, 16)); escaped.append(HtmlCharacterEntityReferences.REFERENCE_END); diff --git a/spring-web/src/test/java/org/springframework/web/util/HtmlCharacterEntityReferencesTests.java b/spring-web/src/test/java/org/springframework/web/util/HtmlCharacterEntityReferencesTests.java index 2cbe50e680..f0b2c1c999 100644 --- a/spring-web/src/test/java/org/springframework/web/util/HtmlCharacterEntityReferencesTests.java +++ b/spring-web/src/test/java/org/springframework/web/util/HtmlCharacterEntityReferencesTests.java @@ -76,6 +76,20 @@ public class HtmlCharacterEntityReferencesTests { (char) -1, entityReferences.convertToCharacter("invalid")); } + // SPR-9293 + @Test + public void testConvertToReferenceUTF8() { + HtmlCharacterEntityReferences entityReferences = new HtmlCharacterEntityReferences(); + String utf8 = "UTF-8"; + assertEquals("<", entityReferences.convertToReference('<', utf8)); + assertEquals(">", entityReferences.convertToReference('>', utf8)); + assertEquals("&", entityReferences.convertToReference('&', utf8)); + assertEquals(""", entityReferences.convertToReference('"', utf8)); + assertEquals("'", entityReferences.convertToReference('\'', utf8)); + assertNull(entityReferences.convertToReference((char) 233, utf8)); + assertNull(entityReferences.convertToReference((char) 934, utf8)); + } + private Map getReferenceCharacterMap() { CharacterEntityResourceIterator entityIterator = new CharacterEntityResourceIterator(); Map referencedCharactersMap = new HashMap(); diff --git a/spring-web/src/test/java/org/springframework/web/util/HtmlUtilsTests.java b/spring-web/src/test/java/org/springframework/web/util/HtmlUtilsTests.java index 26d8f711b3..e752f64a7a 100644 --- a/spring-web/src/test/java/org/springframework/web/util/HtmlUtilsTests.java +++ b/spring-web/src/test/java/org/springframework/web/util/HtmlUtilsTests.java @@ -71,6 +71,28 @@ public class HtmlUtilsTests { "ϑ", HtmlUtils.htmlEscapeDecimal("" + (char) 977)); } + // SPR-9293 + @Test + public void testEncodeIntoHtmlCharacterSetFromUtf8() { + String utf8 = ("UTF-8"); + assertNull("A null string should be converted to a null string", + HtmlUtils.htmlEscape(null, utf8)); + assertEquals("An empty string should be converted to an empty string", + "", HtmlUtils.htmlEscape("", utf8)); + assertEquals("A string containing no special characters should not be affected", + "A sentence containing no special characters.", + HtmlUtils.htmlEscape("A sentence containing no special characters.")); + + assertEquals("'< >' should be encoded to '< >'", + "< >", HtmlUtils.htmlEscape("< >", utf8)); + assertEquals("'< >' should be encoded to '< >'", + "< >", HtmlUtils.htmlEscapeDecimal("< >", utf8)); + + assertEquals("UTF-8 supported chars should not be escaped", + "Μερικοί Ελληνικοί "χαρακτήρες"", + HtmlUtils.htmlEscape("Μερικοί Ελληνικοί \"χαρακτήρες\"", utf8)); + } + @Test public void testDecodeFromHtmlCharacterSet() { assertNull("A null string should be converted to a null string",