Skip to content

Commit e5ebcab

Browse files
committed
Issue ESAPI#394 -- Refactor the URI canonicalization into the Encoder class.
1 parent 22663ed commit e5ebcab

File tree

6 files changed

+196
-187
lines changed

6 files changed

+196
-187
lines changed

src/main/java/org/owasp/esapi/Encoder.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package org.owasp.esapi;
1717

1818
import java.io.IOException;
19+
import java.net.URI;
1920

2021
import org.owasp.esapi.codecs.Codec;
2122
import org.owasp.esapi.errors.EncodingException;
@@ -513,4 +514,15 @@ public interface Encoder {
513514
*/
514515
byte[] decodeFromBase64(String input) throws IOException;
515516

517+
/**
518+
*
519+
* Get a version of the input URI that will be safe to run regex and other validations against.
520+
* It is not recommended to persist this value as it will transform user input. This method
521+
* will not test to see if the URI is RFC-3986 compliant.
522+
*
523+
* @param input
524+
* @return
525+
*/
526+
public String getCanonicalizedURI(URI dirtyUri);
527+
516528
}

src/main/java/org/owasp/esapi/Validator.java

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -708,17 +708,6 @@ public interface Validator {
708708
*/
709709
boolean isValidURI(String context, String input, boolean allowNull);
710710

711-
/**
712-
*
713-
* Get a version of the input URI that will be safe to run regex and other validations against.
714-
* It is not recommended to persist this value as it will transform user input. This method
715-
* will not test to see if the URI is RFC-3986 compliant.
716-
*
717-
* @param input
718-
* @return
719-
*/
720-
public String getCanonicalizedURI(URI dirtyUri);
721-
722711
/**
723712
* Will return a {@code URI} object that will represent a fully parsed and legal URI
724713
* as specified in RFC-3986.

src/main/java/org/owasp/esapi/reference/DefaultEncoder.java

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,23 @@
1717

1818
import java.io.IOException;
1919
import java.io.UnsupportedEncodingException;
20+
import java.net.URI;
2021
import java.net.URLDecoder;
2122
import java.net.URLEncoder;
2223
import java.util.ArrayList;
24+
import java.util.EnumMap;
2325
import java.util.Iterator;
26+
import java.util.LinkedHashMap;
27+
import java.util.LinkedList;
2428
import java.util.List;
29+
import java.util.Map;
30+
import java.util.Map.Entry;
31+
import java.util.Set;
2532

2633
import org.owasp.esapi.ESAPI;
2734
import org.owasp.esapi.Encoder;
2835
import org.owasp.esapi.Logger;
36+
import org.owasp.esapi.SecurityConfiguration;
2937
import org.owasp.esapi.codecs.Base64;
3038
import org.owasp.esapi.codecs.CSSCodec;
3139
import org.owasp.esapi.codecs.Codec;
@@ -445,4 +453,150 @@ public byte[] decodeFromBase64(String input) throws IOException {
445453
}
446454
return Base64.decode( input );
447455
}
456+
457+
/**
458+
* {@inheritDoc}
459+
*
460+
* This will extract each piece of a URI according to parse zone as specified in <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC-3986</a> section 3,
461+
* and it will construct a canonicalized String representing a version of the URI that is safe to
462+
* run regex against.
463+
*
464+
* @param dirtyUri
465+
* @return Canonicalized URI string.
466+
* @throws IntrusionException
467+
*/
468+
public String getCanonicalizedURI(URI dirtyUri) throws IntrusionException{
469+
470+
// From RFC-3986 section 3
471+
// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
472+
//
473+
// hier-part = "//" authority path-abempty
474+
// / path-absolute
475+
// / path-rootless
476+
// / path-empty
477+
478+
// The following are two example URIs and their component parts:
479+
//
480+
// foo://example.com:8042/over/there?name=ferret#nose
481+
// \_/ \______________/\_________/ \_________/ \__/
482+
// | | | | |
483+
// scheme authority path query fragment
484+
// | _____________________|__
485+
// / \ / \
486+
// urn:example:animal:ferret:nose
487+
Map<UriSegment, String> parseMap = new EnumMap<UriSegment, String>(UriSegment.class);
488+
parseMap.put(UriSegment.SCHEME, dirtyUri.getScheme());
489+
//authority = [ userinfo "@" ] host [ ":" port ]
490+
parseMap.put(UriSegment.AUTHORITY, dirtyUri.getRawAuthority());
491+
parseMap.put(UriSegment.SCHEMSPECIFICPART, dirtyUri.getRawSchemeSpecificPart());
492+
parseMap.put(UriSegment.HOST, dirtyUri.getHost());
493+
//if port is undefined, it will return -1
494+
Integer port = new Integer(dirtyUri.getPort());
495+
parseMap.put(UriSegment.PORT, port == -1 ? "": port.toString());
496+
parseMap.put(UriSegment.PATH, dirtyUri.getRawPath());
497+
parseMap.put(UriSegment.QUERY, dirtyUri.getRawQuery());
498+
parseMap.put(UriSegment.FRAGMENT, dirtyUri.getRawFragment());
499+
500+
//Now we canonicalize each part and build our string.
501+
StringBuilder sb = new StringBuilder();
502+
503+
//Replace all the items in the map with canonicalized versions.
504+
505+
Set<UriSegment> set = parseMap.keySet();
506+
507+
SecurityConfiguration sg = ESAPI.securityConfiguration();
508+
boolean allowMixed = sg.getBooleanProp("Encoder.AllowMixedEncoding");
509+
boolean allowMultiple = sg.getBooleanProp("Encoder.AllowMultipleEncoding");
510+
for(UriSegment seg: set){
511+
String value = canonicalize(parseMap.get(seg), allowMultiple, allowMixed);
512+
value = value == null ? "" : value;
513+
//In the case of a uri query, we need to break up and canonicalize the internal parts of the query.
514+
if(seg == UriSegment.QUERY && null != parseMap.get(seg)){
515+
StringBuilder qBuilder = new StringBuilder();
516+
try {
517+
Map<String, List<String>> canonicalizedMap = this.splitQuery(dirtyUri);
518+
Set<Entry<String, List<String>>> query = canonicalizedMap.entrySet();
519+
Iterator<Entry<String, List<String>>> i = query.iterator();
520+
while(i.hasNext()){
521+
Entry<String, List<String>> e = i.next();
522+
String key = (String) e.getKey();
523+
String qVal = "";
524+
List<String> list = (List<String>) e.getValue();
525+
if(!list.isEmpty()){
526+
qVal = list.get(0);
527+
}
528+
qBuilder.append(key)
529+
.append("=")
530+
.append(qVal);
531+
532+
if(i.hasNext()){
533+
qBuilder.append("&");
534+
}
535+
}
536+
value = qBuilder.toString();
537+
} catch (UnsupportedEncodingException e) {
538+
logger.debug(Logger.EVENT_FAILURE, "decoding error when parsing [" + dirtyUri.toString() + "]");
539+
}
540+
}
541+
//Check if the port is -1, if it is, omit it from the output.
542+
if(seg == UriSegment.PORT){
543+
if("-1" == parseMap.get(seg)){
544+
value = "";
545+
}
546+
}
547+
parseMap.put(seg, value );
548+
}
549+
550+
return buildUrl(parseMap);
551+
}
552+
553+
/**
554+
* All the parts should be canonicalized by this point. This is straightforward assembly.
555+
*
556+
* @param set
557+
* @return
558+
*/
559+
protected String buildUrl(Map<UriSegment, String> parseMap){
560+
StringBuilder sb = new StringBuilder();
561+
sb.append(parseMap.get(UriSegment.SCHEME))
562+
.append("://")
563+
//can't use SCHEMESPECIFICPART for this, because we need to canonicalize all the parts of the query.
564+
//USERINFO is also deprecated. So we technically have more than we need.
565+
.append(parseMap.get(UriSegment.AUTHORITY) == null || parseMap.get(UriSegment.AUTHORITY).equals("") ? "" : parseMap.get(UriSegment.AUTHORITY))
566+
.append(parseMap.get(UriSegment.PATH) == null || parseMap.get(UriSegment.PATH).equals("") ? "" : parseMap.get(UriSegment.PATH))
567+
.append(parseMap.get(UriSegment.QUERY) == null || parseMap.get(UriSegment.QUERY).equals("")
568+
? "" : "?" + parseMap.get(UriSegment.QUERY))
569+
.append((parseMap.get(UriSegment.FRAGMENT) == null) || parseMap.get(UriSegment.FRAGMENT).equals("")
570+
? "": "#" + parseMap.get(UriSegment.FRAGMENT))
571+
;
572+
return sb.toString();
573+
}
574+
575+
public enum UriSegment {
576+
AUTHORITY, SCHEME, SCHEMSPECIFICPART, USERINFO, HOST, PORT, PATH, QUERY, FRAGMENT
577+
}
578+
579+
580+
/**
581+
* The meat of this method was taken from StackOverflow: http://stackoverflow.com/a/13592567/557153
582+
* It has been modified to return a canonicalized key and value pairing.
583+
*
584+
* @param java URI
585+
* @return a map of canonicalized query parameters.
586+
* @throws UnsupportedEncodingException
587+
*/
588+
public Map<String, List<String>> splitQuery(URI uri) throws UnsupportedEncodingException {
589+
final Map<String, List<String>> query_pairs = new LinkedHashMap<String, List<String>>();
590+
final String[] pairs = uri.getQuery().split("&");
591+
for (String pair : pairs) {
592+
final int idx = pair.indexOf("=");
593+
final String key = idx > 0 ? canonicalize(pair.substring(0, idx)) : pair;
594+
if (!query_pairs.containsKey(key)) {
595+
query_pairs.put(key, new LinkedList<String>());
596+
}
597+
final String value = idx > 0 && pair.length() > idx + 1 ? URLDecoder.decode(pair.substring(idx + 1), "UTF-8") : null;
598+
query_pairs.get(key).add(canonicalize(value));
599+
}
600+
return query_pairs;
601+
}
448602
}

src/main/java/org/owasp/esapi/reference/DefaultValidator.java

Lines changed: 2 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,11 +1210,11 @@ private final boolean isEmpty(char[] input) {
12101210
public boolean isValidURI(String context, String input, boolean allowNull) {
12111211
boolean isValid = false;
12121212
boolean inputIsNullOrEmpty = input == null || "".equals(input);
1213-
1213+
Encoder encoder = ESAPI.encoder();
12141214
try{
12151215
URI compliantURI = null == input ? new URI("") : this.getRfcCompliantURI(input);
12161216
if(null != compliantURI && input != null){
1217-
String canonicalizedURI = getCanonicalizedURI(compliantURI);
1217+
String canonicalizedURI = encoder.getCanonicalizedURI(compliantURI);
12181218
//if getCanonicalizedURI doesn't throw an IntrusionException, then the URI contains no mixed or
12191219
//double-encoding attacks.
12201220
logger.debug(Logger.SECURITY_SUCCESS, "We did not detect any mixed or multiple encoding in the uri:[" + input + "]");
@@ -1259,150 +1259,4 @@ public URI getRfcCompliantURI(String input){
12591259
}
12601260
return rval;
12611261
}
1262-
1263-
/**
1264-
* {@inheritDoc}
1265-
*
1266-
* This will extract each piece of a URI according to parse zone as specified in <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC-3986</a> section 3,
1267-
* and it will construct a canonicalized String representing a version of the URI that is safe to
1268-
* run regex against.
1269-
*
1270-
* @param dirtyUri
1271-
* @return Canonicalized URI string.
1272-
* @throws IntrusionException
1273-
*/
1274-
public String getCanonicalizedURI(URI dirtyUri) throws IntrusionException{
1275-
1276-
// From RFC-3986 section 3
1277-
// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
1278-
//
1279-
// hier-part = "//" authority path-abempty
1280-
// / path-absolute
1281-
// / path-rootless
1282-
// / path-empty
1283-
1284-
// The following are two example URIs and their component parts:
1285-
//
1286-
// foo://example.com:8042/over/there?name=ferret#nose
1287-
// \_/ \______________/\_________/ \_________/ \__/
1288-
// | | | | |
1289-
// scheme authority path query fragment
1290-
// | _____________________|__
1291-
// / \ / \
1292-
// urn:example:animal:ferret:nose
1293-
Map<UriSegment, String> parseMap = new EnumMap<UriSegment, String>(UriSegment.class);
1294-
parseMap.put(UriSegment.SCHEME, dirtyUri.getScheme());
1295-
//authority = [ userinfo "@" ] host [ ":" port ]
1296-
parseMap.put(UriSegment.AUTHORITY, dirtyUri.getRawAuthority());
1297-
parseMap.put(UriSegment.SCHEMSPECIFICPART, dirtyUri.getRawSchemeSpecificPart());
1298-
parseMap.put(UriSegment.HOST, dirtyUri.getHost());
1299-
//if port is undefined, it will return -1
1300-
Integer port = new Integer(dirtyUri.getPort());
1301-
parseMap.put(UriSegment.PORT, port == -1 ? "": port.toString());
1302-
parseMap.put(UriSegment.PATH, dirtyUri.getRawPath());
1303-
parseMap.put(UriSegment.QUERY, dirtyUri.getRawQuery());
1304-
parseMap.put(UriSegment.FRAGMENT, dirtyUri.getRawFragment());
1305-
1306-
//Now we canonicalize each part and build our string.
1307-
StringBuilder sb = new StringBuilder();
1308-
1309-
//Replace all the items in the map with canonicalized versions.
1310-
1311-
Set<UriSegment> set = parseMap.keySet();
1312-
1313-
SecurityConfiguration sg = ESAPI.securityConfiguration();
1314-
boolean allowMixed = sg.getBooleanProp("Encoder.AllowMixedEncoding");
1315-
boolean allowMultiple = sg.getBooleanProp("Encoder.AllowMultipleEncoding");
1316-
for(UriSegment seg: set){
1317-
String value = encoder.canonicalize(parseMap.get(seg), allowMultiple, allowMixed);
1318-
value = value == null ? "" : value;
1319-
//In the case of a uri query, we need to break up and canonicalize the internal parts of the query.
1320-
if(seg == UriSegment.QUERY && null != parseMap.get(seg)){
1321-
StringBuilder qBuilder = new StringBuilder();
1322-
try {
1323-
Map<String, List<String>> canonicalizedMap = this.splitQuery(dirtyUri);
1324-
Set<Entry<String, List<String>>> query = canonicalizedMap.entrySet();
1325-
Iterator<Entry<String, List<String>>> i = query.iterator();
1326-
while(i.hasNext()){
1327-
Entry<String, List<String>> e = i.next();
1328-
String key = (String) e.getKey();
1329-
String qVal = "";
1330-
List<String> list = (List<String>) e.getValue();
1331-
if(!list.isEmpty()){
1332-
qVal = list.get(0);
1333-
}
1334-
qBuilder.append(key)
1335-
.append("=")
1336-
.append(qVal);
1337-
1338-
if(i.hasNext()){
1339-
qBuilder.append("&");
1340-
}
1341-
}
1342-
value = qBuilder.toString();
1343-
} catch (UnsupportedEncodingException e) {
1344-
logger.debug(Logger.EVENT_FAILURE, "decoding error when parsing [" + dirtyUri.toString() + "]");
1345-
}
1346-
}
1347-
//Check if the port is -1, if it is, omit it from the output.
1348-
if(seg == UriSegment.PORT){
1349-
if("-1" == parseMap.get(seg)){
1350-
value = "";
1351-
}
1352-
}
1353-
parseMap.put(seg, value );
1354-
}
1355-
1356-
return buildUrl(parseMap);
1357-
}
1358-
1359-
/**
1360-
* The meat of this method was taken from StackOverflow: http://stackoverflow.com/a/13592567/557153
1361-
* It has been modified to return a canonicalized key and value pairing.
1362-
*
1363-
* @param java URI
1364-
* @return a map of canonicalized query parameters.
1365-
* @throws UnsupportedEncodingException
1366-
*/
1367-
public Map<String, List<String>> splitQuery(URI uri) throws UnsupportedEncodingException {
1368-
final Map<String, List<String>> query_pairs = new LinkedHashMap<String, List<String>>();
1369-
final String[] pairs = uri.getQuery().split("&");
1370-
for (String pair : pairs) {
1371-
final int idx = pair.indexOf("=");
1372-
final String key = idx > 0 ? encoder.canonicalize(pair.substring(0, idx)) : pair;
1373-
if (!query_pairs.containsKey(key)) {
1374-
query_pairs.put(key, new LinkedList<String>());
1375-
}
1376-
final String value = idx > 0 && pair.length() > idx + 1 ? URLDecoder.decode(pair.substring(idx + 1), "UTF-8") : null;
1377-
query_pairs.get(key).add(encoder.canonicalize(value));
1378-
}
1379-
return query_pairs;
1380-
}
1381-
1382-
public enum UriSegment {
1383-
AUTHORITY, SCHEME, SCHEMSPECIFICPART, USERINFO, HOST, PORT, PATH, QUERY, FRAGMENT
1384-
}
1385-
1386-
/**
1387-
* All the parts should be canonicalized by this point. This is straightforward assembly.
1388-
*
1389-
* @param set
1390-
* @return
1391-
*/
1392-
protected String buildUrl(Map<UriSegment, String> parseMap){
1393-
StringBuilder sb = new StringBuilder();
1394-
sb.append(parseMap.get(UriSegment.SCHEME))
1395-
.append("://")
1396-
//can't use SCHEMESPECIFICPART for this, because we need to canonicalize all the parts of the query.
1397-
//USERINFO is also deprecated. So we technically have more than we need.
1398-
.append(parseMap.get(UriSegment.AUTHORITY) == null || parseMap.get(UriSegment.AUTHORITY).equals("") ? "" : parseMap.get(UriSegment.AUTHORITY))
1399-
.append(parseMap.get(UriSegment.PATH) == null || parseMap.get(UriSegment.PATH).equals("") ? "" : parseMap.get(UriSegment.PATH))
1400-
.append(parseMap.get(UriSegment.QUERY) == null || parseMap.get(UriSegment.QUERY).equals("")
1401-
? "" : "?" + parseMap.get(UriSegment.QUERY))
1402-
.append((parseMap.get(UriSegment.FRAGMENT) == null) || parseMap.get(UriSegment.FRAGMENT).equals("")
1403-
? "": "#" + parseMap.get(UriSegment.FRAGMENT))
1404-
;
1405-
return sb.toString();
1406-
}
1407-
14081262
}

0 commit comments

Comments
 (0)