Skip to content

Commit 0a11b17

Browse files
committed
Fix URL encoding and decoding
The methods `uriEncode` and `uriDecode` did not properly handle percent-encoding. In particular, `uriEncode` didn't properly output two uppercase hex digits and `urlDecode` did not properly handle non-ASCII characters. Aditionally, if no percent-encoding was performed, these methods will now return the original string. Fixes #150 Closes #153 Fixes #154
1 parent 681b0aa commit 0a11b17

File tree

2 files changed

+113
-64
lines changed

2 files changed

+113
-64
lines changed

src/main/java/com/github/packageurl/PackageURL.java

Lines changed: 93 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@
2121
*/
2222
package com.github.packageurl;
2323

24+
import java.io.ByteArrayOutputStream;
2425
import java.io.Serializable;
2526
import java.net.URI;
2627
import java.net.URISyntaxException;
27-
import java.nio.charset.Charset;
28+
import java.nio.ByteBuffer;
2829
import java.nio.charset.StandardCharsets;
2930
import java.util.Arrays;
3031
import java.util.Collections;
@@ -33,6 +34,7 @@
3334
import java.util.TreeMap;
3435
import java.util.function.IntPredicate;
3536
import java.util.stream.Collectors;
37+
import java.util.stream.IntStream;
3638

3739
/**
3840
* <p>Package-URL (aka purl) is a "mostly universal" URL to describe a package. A purl is a URL composed of seven components:</p>
@@ -51,9 +53,10 @@
5153
* @since 1.0.0
5254
*/
5355
public final class PackageURL implements Serializable {
54-
5556
private static final long serialVersionUID = 3243226021636427586L;
5657

58+
private static final char PERCENT_CHAR = '%';
59+
5760
/**
5861
* Constructs a new PackageURL object by parsing the specified string.
5962
*
@@ -496,39 +499,14 @@ private String canonicalize(boolean coordinatesOnly) {
496499
return purl.toString();
497500
}
498501

499-
/**
500-
* Encodes the input in conformance with RFC 3986.
501-
*
502-
* @param input the String to encode
503-
* @return an encoded String
504-
*/
505-
private String percentEncode(final String input) {
506-
return uriEncode(input, StandardCharsets.UTF_8);
507-
}
508-
509-
private static String uriEncode(String source, Charset charset) {
510-
if (source == null || source.isEmpty()) {
511-
return source;
512-
}
513-
514-
StringBuilder builder = new StringBuilder();
515-
for (byte b : source.getBytes(charset)) {
516-
if (isUnreserved(b)) {
517-
builder.append((char) b);
518-
}
519-
else {
520-
// Substitution: A '%' followed by the hexadecimal representation of the ASCII value of the replaced character
521-
builder.append('%');
522-
builder.append(Integer.toHexString(b).toUpperCase());
523-
}
524-
}
525-
return builder.toString();
526-
}
527-
528502
private static boolean isUnreserved(int c) {
529503
return (isValidCharForKey(c) || c == '~');
530504
}
531505

506+
private static boolean shouldEncode(int c) {
507+
return !isUnreserved(c);
508+
}
509+
532510
private static boolean isAlpha(int c) {
533511
return (isLowerCase(c) || isUpperCase(c));
534512
}
@@ -584,42 +562,93 @@ private static String toLowerCase(String s) {
584562
return new String(chars);
585563
}
586564

587-
/**
588-
* Optionally decodes a String, if it's encoded. If String is not encoded,
589-
* method will return the original input value.
590-
*
591-
* @param input the value String to decode
592-
* @return a decoded String
593-
*/
594-
private String percentDecode(final String input) {
595-
if (input == null) {
596-
return null;
597-
}
598-
final String decoded = uriDecode(input);
599-
if (!decoded.equals(input)) {
600-
return decoded;
565+
private static String percentDecode(final String source) {
566+
if (source == null || source.isEmpty()) {
567+
return source;
601568
}
602-
return input;
603-
}
604569

605-
public static String uriDecode(String source) {
606-
if (source == null) {
570+
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
571+
int percentCharCount = getPercentCharCount(bytes);
572+
573+
if (percentCharCount == 0) {
607574
return source;
608575
}
609-
int length = source.length();
610-
StringBuilder builder = new StringBuilder();
576+
577+
int length = bytes.length;
578+
int capacity = (length + percentCharCount) - (percentCharCount * 3);
579+
580+
if (capacity <= 0) {
581+
throw new ValidationException("Invalid encoding in '" + source + "'");
582+
}
583+
584+
ByteBuffer buffer = ByteBuffer.allocate(capacity);
585+
611586
for (int i = 0; i < length; i++) {
612-
if (source.charAt(i) == '%') {
613-
String str = source.substring(i + 1, i + 3);
614-
char c = (char) Integer.parseInt(str, 16);
615-
builder.append(c);
616-
i += 2;
587+
if (buffer.position() + 1 > capacity) {
588+
throw new ValidationException("Invalid encoding in '" + source + "'");
589+
}
590+
591+
int b;
592+
593+
if (bytes[i] == PERCENT_CHAR) {
594+
int b1 = Character.digit(bytes[++i], 16);
595+
int b2 = Character.digit(bytes[++i], 16);
596+
b = (byte) ((b1 << 4) + b2);
597+
} else {
598+
b = bytes[i];
617599
}
618-
else {
619-
builder.append(source.charAt(i));
600+
601+
buffer.put((byte) b);
602+
}
603+
604+
return new String(buffer.array(), StandardCharsets.UTF_8);
605+
}
606+
607+
@Deprecated
608+
public String uriDecode(final String source) {
609+
return percentDecode(source);
610+
}
611+
612+
private static int getUnsafeCharCount(final byte[] bytes) {
613+
return (int) IntStream.range(0, bytes.length).map(i -> bytes[i]).filter(PackageURL::shouldEncode).count();
614+
}
615+
616+
private static boolean isPercent(int c) {
617+
return (c == PERCENT_CHAR);
618+
}
619+
620+
private static int getPercentCharCount(final byte[] bytes) {
621+
return (int) IntStream.range(0, bytes.length).map(i -> bytes[i]).filter(PackageURL::isPercent).count();
622+
}
623+
624+
private static String percentEncode(final String source) {
625+
if (source == null || source.isEmpty()) {
626+
return source;
627+
}
628+
629+
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
630+
int unsafeCharCount = getUnsafeCharCount(bytes);
631+
632+
if (unsafeCharCount == 0) {
633+
return source;
634+
}
635+
636+
int length = bytes.length;
637+
int capacity = (length - unsafeCharCount) + (3 * unsafeCharCount);
638+
ByteBuffer buffer = ByteBuffer.allocate(capacity);
639+
640+
for (byte b : bytes) {
641+
if (shouldEncode(b)) {
642+
byte b1 = (byte) Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
643+
byte b2 = (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16));
644+
byte[] encoded = {(byte) PERCENT_CHAR, b1, b2};
645+
buffer.put(encoded, 0, encoded.length);
646+
} else {
647+
buffer.put(b);
620648
}
621649
}
622-
return builder.toString();
650+
651+
return new String(buffer.array(), StandardCharsets.UTF_8);
623652
}
624653

625654
/**
@@ -696,9 +725,9 @@ private void parse(final String purl) throws MalformedPackageURLException {
696725
// The 'remainder' should now consist of an optional namespace and the name
697726
index = remainder.lastIndexOf('/');
698727
if (index <= start) {
699-
this.name = validateName(percentDecode(remainder.substring(start)));
728+
this.name = validateName(uriDecode(remainder.substring(start)));
700729
} else {
701-
this.name = validateName(percentDecode(remainder.substring(index + 1)));
730+
this.name = validateName(uriDecode(remainder.substring(index + 1)));
702731
remainder = remainder.substring(0, index);
703732
this.namespace = validateNamespace(parsePath(remainder.substring(start), false));
704733
}
@@ -749,7 +778,7 @@ private Map<String, String> parseQualifiers(final String encodedString) throws M
749778
final String[] entry = value.split("=", 2);
750779
if (entry.length == 2 && !entry[1].isEmpty()) {
751780
String key = toLowerCase(entry[0]);
752-
if (map.put(key, percentDecode(entry[1])) != null) {
781+
if (map.put(key, uriDecode(entry[1])) != null) {
753782
throw new ValidationException("Duplicate package qualifier encountered. More then one value was specified for " + key);
754783
}
755784
}
@@ -764,12 +793,12 @@ private Map<String, String> parseQualifiers(final String encodedString) throws M
764793
private String[] parsePath(final String path, final boolean isSubpath) {
765794
return Arrays.stream(path.split("/"))
766795
.filter(segment -> !segment.isEmpty() && !(isSubpath && (".".equals(segment) || "..".equals(segment))))
767-
.map(this::percentDecode)
796+
.map(PackageURL::percentDecode)
768797
.toArray(String[]::new);
769798
}
770799

771800
private String encodePath(final String path) {
772-
return Arrays.stream(path.split("/")).map(this::percentEncode).collect(Collectors.joining("/"));
801+
return Arrays.stream(path.split("/")).map(PackageURL::percentEncode).collect(Collectors.joining("/"));
773802
}
774803

775804
/**

src/test/java/com/github/packageurl/PackageURLTest.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,26 @@ public static void resetLocale() {
7070
Locale.setDefault(defaultLocale);
7171
}
7272

73+
@Test
74+
public void testValidPercentEncoding() throws MalformedPackageURLException {
75+
PackageURL purl = new PackageURL("maven", "com.google.summit", "summit-ast", "2.2.0\n", null, null);
76+
Assert.assertEquals("pkg:maven/com.google.summit/[email protected]%0A", purl.toString());
77+
PackageURL purl2 = new PackageURL("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5");
78+
Assert.assertEquals("Мicrosоft.ЕntitуFramеworkСоrе", purl2.getName());
79+
Assert.assertEquals("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5", purl2.toString());
80+
}
81+
82+
@SuppressWarnings("deprecation")
83+
@Test
84+
public void testInvalidPercentEncoding() throws MalformedPackageURLException {
85+
Assert.assertThrows(MalformedPackageURLException.class, () -> new PackageURL("pkg:maven/com.google.summit/[email protected]%"));
86+
Assert.assertThrows(MalformedPackageURLException.class, () -> new PackageURL("pkg:maven/com.google.summit/[email protected]%0"));
87+
PackageURL purl = new PackageURL("pkg:maven/com.google.summit/[email protected]");
88+
Assert.assertThrows(ValidationException.class, () -> purl.uriDecode("%"));
89+
Assert.assertThrows(ValidationException.class, () -> purl.uriDecode("%0"));
90+
Assert.assertThrows(ValidationException.class, () -> purl.uriDecode("aaaa%0%"));
91+
}
92+
7393
@Test
7494
public void testConstructorParsing() throws Exception {
7595
exception = ExpectedException.none();

0 commit comments

Comments
 (0)