Skip to content

Commit aafa353

Browse files
committed
Issue ESAPI#376 create an alternative API to validate URLs, to thwart ReDoS.
1 parent 32f5e84 commit aafa353

File tree

5 files changed

+251
-17
lines changed

5 files changed

+251
-17
lines changed

src/main/java/org/owasp/esapi/Validator.java

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import java.io.File;
1919
import java.io.InputStream;
20+
import java.net.URI;
2021
import java.text.DateFormat;
2122
import java.util.Date;
2223
import java.util.List;
@@ -687,5 +688,42 @@ public interface Validator {
687688
*/
688689
String safeReadLine(InputStream inputStream, int maxLength) throws ValidationException;
689690

690-
}
691+
/**
692+
*
693+
* Parses and ensures that the URI in question is a valid RFC-3986 URI. This simplifies
694+
* the kind of regex required for subsequent validation to mitigate regex-based
695+
* DoS attacks.
696+
*
697+
* @param context
698+
* A descriptive name of the parameter that you are validating (e.g., LoginPage_UsernameField). This value is used by any logging or error handling that is done with respect to the value passed in.
699+
* @param input
700+
* redirect location to be returned as valid, according to encoding rules set in "ESAPI.properties"
701+
* @param allowNull
702+
* If allowNull is true then an input that is NULL or an empty string will be legal. If allowNull is false then NULL or an empty String will throw a ValidationException.
703+
*
704+
* @return
705+
* @throws ValidationException
706+
*/
707+
boolean isValidURI(String context, String input, boolean allowNull);
691708

709+
/**
710+
*
711+
* Get a version of the input URI that will be safe to run regex and other validations against.
712+
* It is not recommended to persist this value as it will transform user input. This method
713+
* will not test to see if the URI is RFC-3986 compliant.
714+
*
715+
* @param input
716+
* @return
717+
*/
718+
public String getCanonicalizedURI(URI dirtyUri);
719+
720+
/**
721+
* Will return a {@code URI} object that will represent a fully parsed and legal URI
722+
* as specified in RFC-3986.
723+
*
724+
* @param input String
725+
* @return URI object representing a parsed URI, or {@code null} if the URI was non-compliant in some way.
726+
*/
727+
public URI getRfcCompliantURI(String input);
728+
729+
}

src/main/java/org/owasp/esapi/reference/DefaultValidator.java

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,31 @@
2020
import java.io.File;
2121
import java.io.IOException;
2222
import java.io.InputStream;
23+
import java.io.UnsupportedEncodingException;
24+
import java.net.URI;
25+
import java.net.URISyntaxException;
26+
import java.net.URLDecoder;
2327
import java.text.DateFormat;
2428
import java.util.ArrayList;
2529
import java.util.Date;
30+
import java.util.EnumMap;
2631
import java.util.HashMap;
2732
import java.util.HashSet;
2833
import java.util.Iterator;
34+
import java.util.LinkedHashMap;
35+
import java.util.LinkedList;
2936
import java.util.List;
3037
import java.util.Map;
38+
import java.util.Map.Entry;
3139
import java.util.Set;
3240
import java.util.regex.Pattern;
3341

3442
import javax.servlet.http.HttpServletRequest;
3543

3644
import org.owasp.esapi.ESAPI;
3745
import org.owasp.esapi.Encoder;
46+
import org.owasp.esapi.Logger;
47+
import org.owasp.esapi.SecurityConfiguration;
3848
import org.owasp.esapi.ValidationErrorList;
3949
import org.owasp.esapi.ValidationRule;
4050
import org.owasp.esapi.Validator;
@@ -61,6 +71,7 @@
6171
* @see org.owasp.esapi.Validator
6272
*/
6373
public class DefaultValidator implements org.owasp.esapi.Validator {
74+
private static Logger logger = ESAPI.log();
6475
private static volatile Validator instance = null;
6576

6677
public static Validator getInstance() {
@@ -1191,4 +1202,188 @@ private final boolean isEmpty(byte[] input) {
11911202
private final boolean isEmpty(char[] input) {
11921203
return (input==null || input.length == 0);
11931204
}
1205+
1206+
/**
1207+
* {@inheritDoc}
1208+
*/
1209+
public boolean isValidURI(String context, String input, boolean allowNull) {
1210+
boolean isValid = false;
1211+
URI compliantURI = this.getRfcCompliantURI(input);
1212+
1213+
try{
1214+
if(null != compliantURI){
1215+
String canonicalizedURI = getCanonicalizedURI(compliantURI);
1216+
//if getCanonicalizedURI doesn't throw an IntrusionException, then the URI contains no mixed or
1217+
//double-encoding attacks.
1218+
logger.info(Logger.SECURITY_SUCCESS, "We did not detect any mixed or multiple encoding in the uri:[" + input + "]");
1219+
Validator v = ESAPI.validator();
1220+
//This part will use the regex from validation.properties. This regex should be super-simple, and
1221+
//used mainly to restrict certain parts of a URL.
1222+
Pattern p = ESAPI.securityConfiguration().getValidationPattern( "URL" );
1223+
//We're doing this instead of using the normal validator API, because it will canonicalize the input again
1224+
//and if the URI has any queries that also happen to match HTML entities, like ¶
1225+
//it will cease conforming to the regex we now specify for a URL.
1226+
isValid = p.matcher(canonicalizedURI).matches();
1227+
}
1228+
1229+
}catch (IntrusionException e){
1230+
logger.error(Logger.SECURITY_FAILURE, e.getMessage());
1231+
isValid = false;
1232+
}
1233+
1234+
1235+
return isValid;
1236+
}
1237+
1238+
/**
1239+
* {@inheritDoc}
1240+
*/
1241+
public URI getRfcCompliantURI(String input){
1242+
URI rval = null;
1243+
try {
1244+
rval = new URI(input);
1245+
} catch (URISyntaxException e) {
1246+
logger.error(Logger.EVENT_FAILURE, e.getMessage());
1247+
}
1248+
return rval;
1249+
}
1250+
1251+
/**
1252+
* This does alot. This will extract each piece of a URI according to parse zone, and it will construct
1253+
* a canonicalized String representing a version of the URI that is safe to run regex against to it.
1254+
*
1255+
* @param dirtyUri
1256+
* @return
1257+
* @throws IntrusionException
1258+
*/
1259+
public String getCanonicalizedURI(URI dirtyUri) throws IntrusionException{
1260+
1261+
// From RFC-3986 section 3
1262+
// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
1263+
//
1264+
// hier-part = "//" authority path-abempty
1265+
// / path-absolute
1266+
// / path-rootless
1267+
// / path-empty
1268+
1269+
// The following are two example URIs and their component parts:
1270+
//
1271+
// foo://example.com:8042/over/there?name=ferret#nose
1272+
// \_/ \______________/\_________/ \_________/ \__/
1273+
// | | | | |
1274+
// scheme authority path query fragment
1275+
// | _____________________|__
1276+
// / \ / \
1277+
// urn:example:animal:ferret:nose
1278+
Map<UriSegment, String> parseMap = new EnumMap<UriSegment, String>(UriSegment.class);
1279+
parseMap.put(UriSegment.SCHEME, dirtyUri.getScheme());
1280+
//authority = [ userinfo "@" ] host [ ":" port ]
1281+
parseMap.put(UriSegment.AUTHORITY, dirtyUri.getRawAuthority());
1282+
parseMap.put(UriSegment.SCHEMSPECIFICPART, dirtyUri.getRawSchemeSpecificPart());
1283+
parseMap.put(UriSegment.HOST, dirtyUri.getHost());
1284+
//if port is undefined, it will return -1
1285+
Integer port = new Integer(dirtyUri.getPort());
1286+
parseMap.put(UriSegment.PORT, port == -1 ? "": port.toString());
1287+
parseMap.put(UriSegment.PATH, dirtyUri.getRawPath());
1288+
parseMap.put(UriSegment.QUERY, dirtyUri.getRawQuery());
1289+
parseMap.put(UriSegment.FRAGMENT, dirtyUri.getRawFragment());
1290+
1291+
//Now we canonicalize each part and build our string.
1292+
StringBuilder sb = new StringBuilder();
1293+
1294+
//Replace all the items in the map with canonicalized versions.
1295+
1296+
Set<UriSegment> set = parseMap.keySet();
1297+
1298+
SecurityConfiguration sg = ESAPI.securityConfiguration();
1299+
// boolean restrictMixed = sg.getBooleanProp("AllowMixedEncoding");
1300+
// boolean restrictMultiple = sg.getBooleanProp("AllowMultipleEncoding");
1301+
boolean allowMixed = sg.getAllowMixedEncoding();
1302+
boolean allowMultiple = sg.getAllowMultipleEncoding();
1303+
for(UriSegment seg: set){
1304+
String value = encoder.canonicalize(parseMap.get(seg), allowMultiple, allowMixed);
1305+
value = value == null ? "" : value;
1306+
//In the case of a uri query, we need to break up and canonicalize the internal parts of the query.
1307+
if(seg == UriSegment.QUERY && null != parseMap.get(seg)){
1308+
StringBuilder qBuilder = new StringBuilder();
1309+
try {
1310+
Map<String, List<String>> canonicalizedMap = this.splitQuery(dirtyUri);
1311+
Set<Entry<String, List<String>>> query = canonicalizedMap.entrySet();
1312+
Iterator<Entry<String, List<String>>> i = query.iterator();
1313+
while(i.hasNext()){
1314+
Entry<String, List<String>> e = i.next();
1315+
String key = (String) e.getKey();
1316+
String qVal = "";
1317+
List<String> list = (List<String>) e.getValue();
1318+
if(!list.isEmpty()){
1319+
qVal = list.get(0);
1320+
}
1321+
qBuilder.append(key)
1322+
.append("=")
1323+
.append(qVal);
1324+
1325+
if(i.hasNext()){
1326+
qBuilder.append("&");
1327+
}
1328+
}
1329+
value = qBuilder.toString();
1330+
} catch (UnsupportedEncodingException e) {
1331+
logger.debug(Logger.EVENT_FAILURE, "decoding error when parsing [" + dirtyUri.toString() + "]");
1332+
}
1333+
}
1334+
parseMap.put(seg, value );
1335+
}
1336+
1337+
return buildUrl(parseMap);
1338+
}
1339+
1340+
/**
1341+
* The meat of this method was taken from StackOverflow: http://stackoverflow.com/a/13592567/557153
1342+
* It has been modified to return a canonicalized key and value pairing.
1343+
*
1344+
* @param java URI
1345+
* @return a map of canonicalized query parameters.
1346+
* @throws UnsupportedEncodingException
1347+
*/
1348+
public Map<String, List<String>> splitQuery(URI uri) throws UnsupportedEncodingException {
1349+
final Map<String, List<String>> query_pairs = new LinkedHashMap<String, List<String>>();
1350+
final String[] pairs = uri.getQuery().split("&");
1351+
for (String pair : pairs) {
1352+
final int idx = pair.indexOf("=");
1353+
final String key = idx > 0 ? encoder.canonicalize(pair.substring(0, idx)) : pair;
1354+
if (!query_pairs.containsKey(key)) {
1355+
query_pairs.put(key, new LinkedList<String>());
1356+
}
1357+
final String value = idx > 0 && pair.length() > idx + 1 ? URLDecoder.decode(pair.substring(idx + 1), "UTF-8") : null;
1358+
query_pairs.get(key).add(encoder.canonicalize(value));
1359+
}
1360+
return query_pairs;
1361+
}
1362+
1363+
public enum UriSegment {
1364+
AUTHORITY, SCHEME, SCHEMSPECIFICPART, USERINFO, HOST, PORT, PATH, QUERY, FRAGMENT
1365+
}
1366+
1367+
/**
1368+
* All the parts should be canonicalized by this point. This is straightforward assembly.
1369+
*
1370+
* @param set
1371+
* @return
1372+
*/
1373+
protected String buildUrl(Map<UriSegment, String> parseMap){
1374+
StringBuilder sb = new StringBuilder();
1375+
sb.append(parseMap.get(UriSegment.SCHEME))
1376+
.append("://")
1377+
//can't use SCHEMESPECIFICPART for this, because we need to canonicalize all the parts of the query.
1378+
//USERINFO is also deprecated. So we technically have more than we need.
1379+
.append(parseMap.get(UriSegment.AUTHORITY) == null || parseMap.get(UriSegment.AUTHORITY).equals("") ? "" : parseMap.get(UriSegment.AUTHORITY))
1380+
.append(parseMap.get(UriSegment.PATH) == null || parseMap.get(UriSegment.PATH).equals("") ? "" : parseMap.get(UriSegment.PATH))
1381+
.append(parseMap.get(UriSegment.QUERY) == null || parseMap.get(UriSegment.QUERY).equals("")
1382+
? "" : "?" + parseMap.get(UriSegment.QUERY))
1383+
.append((parseMap.get(UriSegment.FRAGMENT) == null) || parseMap.get(UriSegment.FRAGMENT).equals("")
1384+
? "": "#" + parseMap.get(UriSegment.FRAGMENT))
1385+
;
1386+
return sb.toString();
1387+
}
1388+
11941389
}

src/test/java/org/owasp/esapi/reference/ValidatorTest.java

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -621,34 +621,35 @@ public void testisValidInput() {
621621
assertTrue(errors.size()==4);
622622
assertTrue(instance.isValidInput("test10", "http://www.aspectsecurity.com", "URL", 100, false, errors));
623623
assertTrue(errors.size()==4);
624-
assertFalse(instance.isValidInput("test11", "http:///www.aspectsecurity.com", "URL", 100, false, errors));
625-
assertTrue(errors.size()==5);
624+
// This is getting flipped to true because it is no longer the validator regex's job to enforce URL structure.
625+
assertTrue(instance.isValidInput("test11", "http:///www.aspectsecurity.com", "URL", 100, false, errors));
626+
assertTrue(errors.size()==4);
626627
assertFalse(instance.isValidInput("test12", "http://www.aspect security.com", "URL", 100, false, errors));
627-
assertTrue(errors.size()==6);
628+
assertTrue(errors.size()==5);
628629
assertTrue(instance.isValidInput("test13", "078-05-1120", "SSN", 100, false, errors));
629-
assertTrue(errors.size()==6);
630+
assertTrue(errors.size()==5);
630631
assertTrue(instance.isValidInput("test14", "078 05 1120", "SSN", 100, false, errors));
631-
assertTrue(errors.size()==6);
632+
assertTrue(errors.size()==5);
632633
assertTrue(instance.isValidInput("test15", "078051120", "SSN", 100, false, errors));
633-
assertTrue(errors.size()==6);
634+
assertTrue(errors.size()==5);
634635
assertFalse(instance.isValidInput("test16", "987-65-4320", "SSN", 100, false, errors));
635-
assertTrue(errors.size()==7);
636+
assertTrue(errors.size()==6);
636637
assertFalse(instance.isValidInput("test17", "000-00-0000", "SSN", 100, false, errors));
637-
assertTrue(errors.size()==8);
638+
assertTrue(errors.size()==7);
638639
assertFalse(instance.isValidInput("test18", "(555) 555-5555", "SSN", 100, false, errors));
639-
assertTrue(errors.size()==9);
640+
assertTrue(errors.size()==8);
640641
assertFalse(instance.isValidInput("test19", "test", "SSN", 100, false, errors));
641-
assertTrue(errors.size()==10);
642+
assertTrue(errors.size()==9);
642643
assertTrue(instance.isValidInput("test20", "jeffWILLIAMS123", "HTTPParameterValue", 100, false, errors));
643-
assertTrue(errors.size()==10);
644+
assertTrue(errors.size()==9);
644645
assertTrue(instance.isValidInput("test21", "jeff .-/+=@_ WILLIAMS", "HTTPParameterValue", 100, false, errors));
645-
assertTrue(errors.size()==10);
646+
assertTrue(errors.size()==9);
646647
// Removed per Issue 116 - The '*' character is valid as a parameter character
647648
// assertFalse(instance.isValidInput("test", "jeff*WILLIAMS", "HTTPParameterValue", 100, false));
648649
assertFalse(instance.isValidInput("test22", "jeff^WILLIAMS", "HTTPParameterValue", 100, false, errors));
649-
assertTrue(errors.size()==11);
650+
assertTrue(errors.size()==10);
650651
assertFalse(instance.isValidInput("test23", "jeff\\WILLIAMS", "HTTPParameterValue", 100, false, errors));
651-
assertTrue(errors.size()==12);
652+
assertTrue(errors.size()==11);
652653

653654
assertTrue(instance.isValidInput("test", null, "Email", 100, true, errors));
654655
assertFalse(instance.isValidInput("test", null, "Email", 100, false, errors));

src/test/resources/esapi/validation.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ Validator.Email=^[A-Za-z0-9._%'-]+@[A-Za-z0-9.-]+\\.[a-zA-Z]{2,62}$
2727
Validator.Gmail=^[A-Za-z0-9._%'-+]+@[A-Za-z0-9.-]+\\.[a-zA-Z]{2,62}$
2828
Validator.IPAddress=^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$
2929
#Validator.URL=^(?:ht|f)tp(s?+)\\:\\/\\/[0-9a-zA-Z](?:[-.\\w]*[0-9a-zA-Z])*(?::(?:0-9)*)*(?:\\/?+)(?:[a-zA-Z0-9\\-\\.\\?\\,\\:\\'\\/\\\\\\+=&amp;%\\$#_]*)?+$
30-
Validator.URL=^(?:ht|f)tp(?:s?)(?:[\\p{Print}]*)$
30+
Validator.URL=^(?:ht|f)tp(?:s?)(?:[:A-Za-z0-9%/#?&.=-]*)$
3131
Validator.CreditCard=^(\\d{4}[- ]?){3}\\d{4}$
3232
Validator.SSN=^(?!000)([0-6]\\d{2}|7([0-6]\\d|7[012]))([ -]?)(?!00)\\d\\d\\3(?!0000)\\d{4}$
3333

0 commit comments

Comments
 (0)