Drop superseded encoding-checking code

This change drops some code which performs various encoding checks that no longer correspond to any current requirements in the Encoding spec.
validator · sideshowbarker · Sep 12, 2020 · Sep 12, 2020 · Sep 12, 2020 · Sep 12, 2020
commit 24538913bedd0259cfa19075160dadc822bb6d1c
diff --git a/src/nu/validator/htmlparser/io/Driver.java b/src/nu/validator/htmlparser/io/Driver.java
@@ -470,33 +470,6 @@ protected Encoding encodingFromExternalDeclaration(String encoding)
     protected Encoding whineAboutEncodingAndReturnActual(String encoding,
             Encoding cs) throws SAXException {
         String canonName = cs.getCanonName();
-        if (!cs.isRegistered()) {
-            if (encoding.startsWith("x-")) {
-                tokenizer.err("The encoding \u201C"
-                        + encoding
-                        + "\u201D is not an IANA-registered encoding. (Charmod C022)");
-            } else {
-                tokenizer.err("The encoding \u201C"
-                        + encoding
-                        + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
-            }
-        } else if (!canonName.equals(encoding)) {
-            tokenizer.err("The encoding \u201C"
-                    + encoding
-                    + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
-                    + canonName + "\u201D. (Charmod C024)");
-        }
-        if (cs.isShouldNot()) {
-            tokenizer.warn("Authors should not use the character encoding \u201C"
-                    + encoding
-                    + "\u201D. It is recommended to use \u201CUTF-8\u201D.");
-        } else if (cs.isLikelyEbcdic()) {
-            tokenizer.warn("Authors should not use EBCDIC-based encodings. It is recommended to use \u201CUTF-8\u201D.");
-        } else if (cs.isObscure()) {
-            tokenizer.warn("The character encoding \u201C"
-                    + encoding
-                    + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
-        }
         if (!canonName.equals(encoding)) {
             tokenizer.err(Encoding.msgNotPreferredName(encoding, canonName));
         }

diff --git a/src/nu/validator/htmlparser/io/Encoding.java b/src/nu/validator/htmlparser/io/Encoding.java
@@ -52,17 +52,6 @@ public class Encoding {
 
     public static final Encoding WINDOWS1252;
 
-    private static String[] SHOULD_NOT = { "jisx02121990", "xjis0208" };
-
-    private static String[] BANNED = { "bocu1", "cesu8", "compoundtext",
-            "iscii91", "macarabic", "maccentraleurroman", "maccroatian",
-            "maccyrillic", "macdevanagari", "macfarsi", "macgreek",
-            "macgujarati", "macgurmukhi", "machebrew", "macicelandic",
-            "macroman", "macromanian", "macthai", "macturkish", "macukranian",
-            "scsu", "utf32", "utf32be", "utf32le", "utf7", "ximapmailboxname",
-            "xjisautodetect", "xutf16bebom", "xutf16lebom", "xutf32bebom",
-            "xutf32lebom", "xutf16oppositeendian", "xutf16platformendian",
-            "xutf32oppositeendian", "xutf32platformendian" };
     private static Map<String, Encoding> encodingByLabel =
         new HashMap<String, Encoding>();
 
@@ -304,12 +293,6 @@ public class Encoding {
 
     private final Charset charset;
 
-    private final boolean obscure;
-
-    private final boolean shouldNot;
-
-    private final boolean likelyEbcdic;
-
     static {
         Set<Encoding> encodings = new HashSet<Encoding>();
 
@@ -345,30 +328,6 @@ asciiSuperset, isObscure(name),
         WINDOWS1252 = forName("windows-1252");
     }
 
-    private static boolean isObscure(String lowerCasePreferredIanaName) {
-        return !(Arrays.binarySearch(NOT_OBSCURE, lowerCasePreferredIanaName) > -1);
-    }
-
-    private static boolean isBanned(String lowerCasePreferredIanaName) {
-        if (lowerCasePreferredIanaName.startsWith("xibm")) {
-            return true;
-        }
-        return (Arrays.binarySearch(BANNED, lowerCasePreferredIanaName) > -1);
-    }
-
-    private static boolean isShouldNot(String lowerCasePreferredIanaName) {
-        return (Arrays.binarySearch(SHOULD_NOT, lowerCasePreferredIanaName) > -1);
-    }
-
-    private static boolean isLikelyEbcdic(String canonName,
-            boolean asciiSuperset) {
-        if (!asciiSuperset) {
-            return (canonName.startsWith("cp") || canonName.startsWith("ibm") || canonName.startsWith("xibm"));
-        } else {
-            return false;
-        }
-    }
-
     public static Encoding forName(String name) {
         Encoding rv = encodingByLabel.get(toNameKey(name));
         if (rv == null) {
@@ -454,37 +413,6 @@ public String getCanonName() {
         return canonName;
     }
 
-    /**
-     * Returns the likelyEbcdic.
-     * 
-     * @return the likelyEbcdic
-     */
-    public boolean isLikelyEbcdic() {
-        return likelyEbcdic;
-    }
-
-    /**
-     * Returns the obscure.
-     * 
-     * @return the obscure
-     */
-    public boolean isObscure() {
-        return obscure;
-    }
-
-    /**
-     * Returns the shouldNot.
-     * 
-     * @return the shouldNot
-     */
-    public boolean isShouldNot() {
-        return shouldNot;
-    }
-
-    public boolean isRegistered() {
-        return !canonName.startsWith("x-");
-    }
-
     /**
      * @return
      * @see java.nio.charset.Charset#canEncode()

diff --git a/src/nu/validator/htmlparser/io/MetaSniffer.java b/src/nu/validator/htmlparser/io/MetaSniffer.java
@@ -169,28 +169,6 @@ protected boolean tryCharset(String encoding) throws SAXException {
             } else {
                 Encoding cs = Encoding.forName(encoding);
                 String canonName = cs.getCanonName();
-                if (!cs.isRegistered()) {
-                    if (encoding.startsWith("x-")) {
-                        err("The encoding \u201C"
-                                + encoding
-                                + "\u201D is not an IANA-registered encoding. (Charmod C022)");                    
-                    } else {
-                        err("The encoding \u201C"
-                                + encoding
-                                + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
-                    }
-                } else if (!cs.getCanonName().equals(encoding)) {
-                    err("The encoding \u201C" + encoding
-                            + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
-                            + canonName + "\u201D. (Charmod C024)");
-                }
-                if (cs.isShouldNot()) {
-                    warn("Authors should not use the character encoding \u201C"
-                            + encoding
-                            + "\u201D. It is recommended to use \u201CUTF-8\u201D.");                
-                } else if (cs.isObscure()) {
-                    warn("The character encoding \u201C" + encoding + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
-                }
                 if (!cs.getCanonName().equals(encoding)) {
                     err(Encoding.msgNotCanonicalName(encoding, canonName));
                     this.characterEncoding = cs;