Skip to content

Commit 660054d

Browse files
committed
Issue ESAPI#300 -- Solved the root problem, now have many unit tests to clean up.
1 parent 5e4e201 commit 660054d

File tree

3 files changed

+98
-4
lines changed

3 files changed

+98
-4
lines changed

src/main/java/org/owasp/esapi/codecs/Codec.java

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,15 @@ public Codec() {
6464
*/
6565
public String encode(char[] immune, String input) {
6666
StringBuilder sb = new StringBuilder();
67-
for (int i = 0; i < input.length(); i++) {
68-
char c = input.charAt(i);
69-
sb.append(encodeCharacter(immune, c));
67+
for(int offset = 0; offset < input.length(); ){
68+
final int point = input.codePointAt(offset);
69+
if(Character.isBmpCodePoint(point)){
70+
//We can then safely cast this to char and maintain legacy behavior.
71+
sb.append(encodeCharacter(immune, (char) point));
72+
}else{
73+
sb.append(encodeCharacter(immune, point));
74+
}
75+
offset += Character.charCount(point);
7076
}
7177
return sb.toString();
7278
}
@@ -83,6 +89,19 @@ public String encode(char[] immune, String input) {
8389
public String encodeCharacter( char[] immune, Character c ) {
8490
return ""+c;
8591
}
92+
93+
/**
94+
* Default codepoint implementation that should be overridden in specific codecs.
95+
*
96+
* @param immune
97+
* @param codePoint
98+
* the integer to encode
99+
* @return
100+
* the encoded Character
101+
*/
102+
public String encodeCharacter( char[] immune, int codePoint ) {
103+
return new StringBuilder().appendCodePoint(codePoint).toString();
104+
}
86105

87106
/**
88107
* Decode a String that was encoded using the encode method in this Class
@@ -131,6 +150,19 @@ public static String getHexForNonAlphanumeric(char c)
131150
return hex[c];
132151
return toHex(c);
133152
}
153+
154+
/**
155+
* Lookup the hex value of any character that is not alphanumeric.
156+
* @param c The character to lookup.
157+
* @return, return null if alphanumeric or the character code
158+
* in hex.
159+
*/
160+
public static String getHexForNonAlphanumeric(int c)
161+
{
162+
if(c<0xFF)
163+
return hex[c];
164+
return toHex(c);
165+
}
134166

135167
public static String toOctal(char c)
136168
{
@@ -141,6 +173,11 @@ public static String toHex(char c)
141173
{
142174
return Integer.toHexString(c);
143175
}
176+
177+
public static String toHex(int c)
178+
{
179+
return Integer.toHexString(c);
180+
}
144181

145182
/**
146183
* Utility to search a char[] for a specific char.

src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,42 @@ public String encodeCharacter( char[] immune, Character c ) {
7878
return "&#x" + hex + ";";
7979
}
8080

81+
/**
82+
* {@inheritDoc}
83+
*
84+
* Encodes a Character for safe use in an HTML entity field.
85+
* @param immune
86+
*/
87+
public String encodeCharacter( char[] immune, int codePoint ) {
88+
89+
// check for immune characters
90+
// if ( containsCharacter(codePoint, immune ) ) {
91+
// return ""+codePoint;
92+
// }
93+
94+
// // check for alphanumeric characters
95+
String hex = Codec.getHexForNonAlphanumeric(codePoint);
96+
// if ( hex == null ) {
97+
// return ""+c;
98+
// }
99+
//
100+
// // check for illegal characters
101+
// if ( ( c <= 0x1f && c != '\t' && c != '\n' && c != '\r' ) || ( c >= 0x7f && c <= 0x9f ) )
102+
// {
103+
// hex = REPLACEMENT_HEX; // Let's entity encode this instead of returning it
104+
// c = REPLACEMENT_CHAR;
105+
// }
106+
//
107+
// // check if there's a defined entity
108+
// String entityName = (String) characterToEntityMap.get(c);
109+
// if (entityName != null) {
110+
// return "&" + entityName + ";";
111+
// }
112+
113+
// return the hex entity as suggested in the spec
114+
return "&#x" + hex + ";";
115+
}
116+
81117
/**
82118
* {@inheritDoc}
83119
*

src/test/java/org/owasp/esapi/reference/EncoderTest.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.owasp.esapi.EncoderConstants;
3333
import org.owasp.esapi.codecs.Base64;
3434
import org.owasp.esapi.codecs.Codec;
35+
import org.owasp.esapi.codecs.HTMLEntityCodec;
3536
import org.owasp.esapi.codecs.MySQLCodec;
3637
import org.owasp.esapi.codecs.OracleCodec;
3738
import org.owasp.esapi.codecs.PushbackString;
@@ -902,7 +903,27 @@ public void testGetCanonicalizedUriWithMailto() throws Exception {
902903
URI uri = new URI(input);
903904
System.out.println(uri.toString());
904905
assertEquals(expectedUri, e.getCanonicalizedURI(uri));
905-
906+
}
907+
908+
public void testHtmlEncodeStrSurrogatePair()
909+
{
910+
Encoder enc = ESAPI.encoder();
911+
String inStr = new String (new int[]{0x2f804}, 0, 1);
912+
assertEquals(false, Character.isBmpCodePoint(inStr.codePointAt(0)));
913+
assertEquals(true, Character.isBmpCodePoint(new String(new int[] {0x0a}, 0, 1).codePointAt(0)));
914+
String expected = "&#x2f804;";
915+
String result;
916+
917+
result = enc.encodeForHTML(inStr);
918+
assertEquals(expected, result);
919+
}
920+
921+
public void testHtmlDecodeHexEntititesSurrogatePair()
922+
{
923+
HTMLEntityCodec htmlCodec = new HTMLEntityCodec();
924+
String expected = new String (new int[]{0x2f804}, 0, 1);
925+
assertEquals( expected, htmlCodec.decode("&#194564;") );
926+
assertEquals( expected, htmlCodec.decode("&#x2f804;") );
906927
}
907928
}
908929

0 commit comments

Comments
 (0)