|
17 | 17 |
|
18 | 18 | import java.io.IOException;
|
19 | 19 | import java.io.UnsupportedEncodingException;
|
| 20 | +import java.net.URI; |
20 | 21 | import java.net.URLDecoder;
|
21 | 22 | import java.net.URLEncoder;
|
22 | 23 | import java.util.ArrayList;
|
| 24 | +import java.util.EnumMap; |
23 | 25 | import java.util.Iterator;
|
| 26 | +import java.util.LinkedHashMap; |
| 27 | +import java.util.LinkedList; |
24 | 28 | import java.util.List;
|
| 29 | +import java.util.Map; |
| 30 | +import java.util.Map.Entry; |
| 31 | +import java.util.Set; |
25 | 32 |
|
26 | 33 | import org.owasp.esapi.ESAPI;
|
27 | 34 | import org.owasp.esapi.Encoder;
|
28 | 35 | import org.owasp.esapi.Logger;
|
| 36 | +import org.owasp.esapi.SecurityConfiguration; |
29 | 37 | import org.owasp.esapi.codecs.Base64;
|
30 | 38 | import org.owasp.esapi.codecs.CSSCodec;
|
31 | 39 | import org.owasp.esapi.codecs.Codec;
|
@@ -452,4 +460,150 @@ public byte[] decodeFromBase64(String input) throws IOException {
|
452 | 460 | }
|
453 | 461 | return Base64.decode( input );
|
454 | 462 | }
|
| 463 | + |
| 464 | + /** |
| 465 | + * {@inheritDoc} |
| 466 | + * |
| 467 | + * This will extract each piece of a URI according to parse zone as specified in <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC-3986</a> section 3, |
| 468 | + * and it will construct a canonicalized String representing a version of the URI that is safe to |
| 469 | + * run regex against. |
| 470 | + * |
| 471 | + * @param dirtyUri |
| 472 | + * @return Canonicalized URI string. |
| 473 | + * @throws IntrusionException |
| 474 | + */ |
| 475 | + public String getCanonicalizedURI(URI dirtyUri) throws IntrusionException{ |
| 476 | + |
| 477 | +// From RFC-3986 section 3 |
| 478 | +// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] |
| 479 | +// |
| 480 | +// hier-part = "//" authority path-abempty |
| 481 | +// / path-absolute |
| 482 | +// / path-rootless |
| 483 | +// / path-empty |
| 484 | + |
| 485 | +// The following are two example URIs and their component parts: |
| 486 | +// |
| 487 | +// foo://example.com:8042/over/there?name=ferret#nose |
| 488 | +// \_/ \______________/\_________/ \_________/ \__/ |
| 489 | +// | | | | | |
| 490 | +// scheme authority path query fragment |
| 491 | +// | _____________________|__ |
| 492 | +// / \ / \ |
| 493 | +// urn:example:animal:ferret:nose |
| 494 | + Map<UriSegment, String> parseMap = new EnumMap<UriSegment, String>(UriSegment.class); |
| 495 | + parseMap.put(UriSegment.SCHEME, dirtyUri.getScheme()); |
| 496 | + //authority = [ userinfo "@" ] host [ ":" port ] |
| 497 | + parseMap.put(UriSegment.AUTHORITY, dirtyUri.getRawAuthority()); |
| 498 | + parseMap.put(UriSegment.SCHEMSPECIFICPART, dirtyUri.getRawSchemeSpecificPart()); |
| 499 | + parseMap.put(UriSegment.HOST, dirtyUri.getHost()); |
| 500 | + //if port is undefined, it will return -1 |
| 501 | + Integer port = new Integer(dirtyUri.getPort()); |
| 502 | + parseMap.put(UriSegment.PORT, port == -1 ? "": port.toString()); |
| 503 | + parseMap.put(UriSegment.PATH, dirtyUri.getRawPath()); |
| 504 | + parseMap.put(UriSegment.QUERY, dirtyUri.getRawQuery()); |
| 505 | + parseMap.put(UriSegment.FRAGMENT, dirtyUri.getRawFragment()); |
| 506 | + |
| 507 | + //Now we canonicalize each part and build our string. |
| 508 | + StringBuilder sb = new StringBuilder(); |
| 509 | + |
| 510 | + //Replace all the items in the map with canonicalized versions. |
| 511 | + |
| 512 | + Set<UriSegment> set = parseMap.keySet(); |
| 513 | + |
| 514 | + SecurityConfiguration sg = ESAPI.securityConfiguration(); |
| 515 | + boolean allowMixed = sg.getBooleanProp("Encoder.AllowMixedEncoding"); |
| 516 | + boolean allowMultiple = sg.getBooleanProp("Encoder.AllowMultipleEncoding"); |
| 517 | + for(UriSegment seg: set){ |
| 518 | + String value = canonicalize(parseMap.get(seg), allowMultiple, allowMixed); |
| 519 | + value = value == null ? "" : value; |
| 520 | + //In the case of a uri query, we need to break up and canonicalize the internal parts of the query. |
| 521 | + if(seg == UriSegment.QUERY && null != parseMap.get(seg)){ |
| 522 | + StringBuilder qBuilder = new StringBuilder(); |
| 523 | + try { |
| 524 | + Map<String, List<String>> canonicalizedMap = this.splitQuery(dirtyUri); |
| 525 | + Set<Entry<String, List<String>>> query = canonicalizedMap.entrySet(); |
| 526 | + Iterator<Entry<String, List<String>>> i = query.iterator(); |
| 527 | + while(i.hasNext()){ |
| 528 | + Entry<String, List<String>> e = i.next(); |
| 529 | + String key = (String) e.getKey(); |
| 530 | + String qVal = ""; |
| 531 | + List<String> list = (List<String>) e.getValue(); |
| 532 | + if(!list.isEmpty()){ |
| 533 | + qVal = list.get(0); |
| 534 | + } |
| 535 | + qBuilder.append(key) |
| 536 | + .append("=") |
| 537 | + .append(qVal); |
| 538 | + |
| 539 | + if(i.hasNext()){ |
| 540 | + qBuilder.append("&"); |
| 541 | + } |
| 542 | + } |
| 543 | + value = qBuilder.toString(); |
| 544 | + } catch (UnsupportedEncodingException e) { |
| 545 | + logger.debug(Logger.EVENT_FAILURE, "decoding error when parsing [" + dirtyUri.toString() + "]"); |
| 546 | + } |
| 547 | + } |
| 548 | + //Check if the port is -1, if it is, omit it from the output. |
| 549 | + if(seg == UriSegment.PORT){ |
| 550 | + if("-1" == parseMap.get(seg)){ |
| 551 | + value = ""; |
| 552 | + } |
| 553 | + } |
| 554 | + parseMap.put(seg, value ); |
| 555 | + } |
| 556 | + |
| 557 | + return buildUrl(parseMap); |
| 558 | + } |
| 559 | + |
| 560 | + /** |
| 561 | + * All the parts should be canonicalized by this point. This is straightforward assembly. |
| 562 | + * |
| 563 | + * @param set |
| 564 | + * @return |
| 565 | + */ |
| 566 | + protected String buildUrl(Map<UriSegment, String> parseMap){ |
| 567 | + StringBuilder sb = new StringBuilder(); |
| 568 | + sb.append(parseMap.get(UriSegment.SCHEME)) |
| 569 | + .append("://") |
| 570 | + //can't use SCHEMESPECIFICPART for this, because we need to canonicalize all the parts of the query. |
| 571 | + //USERINFO is also deprecated. So we technically have more than we need. |
| 572 | + .append(parseMap.get(UriSegment.AUTHORITY) == null || parseMap.get(UriSegment.AUTHORITY).equals("") ? "" : parseMap.get(UriSegment.AUTHORITY)) |
| 573 | + .append(parseMap.get(UriSegment.PATH) == null || parseMap.get(UriSegment.PATH).equals("") ? "" : parseMap.get(UriSegment.PATH)) |
| 574 | + .append(parseMap.get(UriSegment.QUERY) == null || parseMap.get(UriSegment.QUERY).equals("") |
| 575 | + ? "" : "?" + parseMap.get(UriSegment.QUERY)) |
| 576 | + .append((parseMap.get(UriSegment.FRAGMENT) == null) || parseMap.get(UriSegment.FRAGMENT).equals("") |
| 577 | + ? "": "#" + parseMap.get(UriSegment.FRAGMENT)) |
| 578 | + ; |
| 579 | + return sb.toString(); |
| 580 | + } |
| 581 | + |
| 582 | + public enum UriSegment { |
| 583 | + AUTHORITY, SCHEME, SCHEMSPECIFICPART, USERINFO, HOST, PORT, PATH, QUERY, FRAGMENT |
| 584 | + } |
| 585 | + |
| 586 | + |
| 587 | + /** |
| 588 | + * The meat of this method was taken from StackOverflow: http://stackoverflow.com/a/13592567/557153 |
| 589 | + * It has been modified to return a canonicalized key and value pairing. |
| 590 | + * |
| 591 | + * @param java URI |
| 592 | + * @return a map of canonicalized query parameters. |
| 593 | + * @throws UnsupportedEncodingException |
| 594 | + */ |
| 595 | + public Map<String, List<String>> splitQuery(URI uri) throws UnsupportedEncodingException { |
| 596 | + final Map<String, List<String>> query_pairs = new LinkedHashMap<String, List<String>>(); |
| 597 | + final String[] pairs = uri.getQuery().split("&"); |
| 598 | + for (String pair : pairs) { |
| 599 | + final int idx = pair.indexOf("="); |
| 600 | + final String key = idx > 0 ? canonicalize(pair.substring(0, idx)) : pair; |
| 601 | + if (!query_pairs.containsKey(key)) { |
| 602 | + query_pairs.put(key, new LinkedList<String>()); |
| 603 | + } |
| 604 | + final String value = idx > 0 && pair.length() > idx + 1 ? URLDecoder.decode(pair.substring(idx + 1), "UTF-8") : null; |
| 605 | + query_pairs.get(key).add(canonicalize(value)); |
| 606 | + } |
| 607 | + return query_pairs; |
| 608 | + } |
455 | 609 | }
|
0 commit comments