Use a proc-macro to optimize match_ignore_ascii_case

Previously, the compiler would emit many `eq_ignore_ascii_case` calls, leading to code bloat and probably some slowness. Now, we pre-lowercase the input in a stack-allocated buffer then match exact strings. Hopefully, the optimizer can turn this into a static table and a loop.
servo · SimonSapin · Feb 25, 2017 · Feb 24, 2017 · Feb 24, 2017 · Feb 24, 2017
commit 808922abbc7b410f76804e47a5f485eef38d7fa9
diff --git a/Cargo.toml b/Cargo.toml
@@ -23,6 +23,7 @@ tempdir = "0.3"
 encoding_rs = "0.5"
 
 [dependencies]
+cssparser-macros = {path = "./macros", version = "0.1"}
 heapsize = {version = "0.3", optional = true}
 matches = "0.1"
 serde = {version = "0.9", optional = true}
@@ -34,3 +35,6 @@ quote = "0.3"
 [features]
 bench = []
 dummy_match_byte = []
+
+[workspace]
+members = [".", "./macros"]
diff --git a/macros/Cargo.toml b/macros/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "cssparser-macros"
+version = "0.1.0"
+authors = ["Simon Sapin <[email protected]>"]
+description = "Procedural macros for cssparser"
+documentation = "https://docs.rs/cssparser-macros/"
+repository = "https://github.com/servo/rust-cssparser"
+license = "MPL-2.0"
+
+[lib]
+path = "lib.rs"
+proc-macro = true
+
+[dependencies]
+syn = "0.11"
+quote = "0.3"
diff --git a/macros/lib.rs b/macros/lib.rs
@@ -0,0 +1,47 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+extern crate proc_macro;
+#[macro_use] extern crate quote;
+extern crate syn;
+
+use std::ascii::AsciiExt;
+
+#[proc_macro_derive(cssparser__match_ignore_ascii_case__derive,
+                    attributes(cssparser__match_ignore_ascii_case__data))]
+pub fn expand_token_stream(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
+    let input = syn::parse_macro_input(&input.to_string()).unwrap();
+
+    let max_length;
+
+    match input.attrs[0].value {
+        syn::MetaItem::List(ref ident, ref nested)
+        if ident == "cssparser__match_ignore_ascii_case__data" => {
+            let lengths = nested.iter().map(|sub_attr| match *sub_attr {
+                syn::NestedMetaItem::MetaItem(
+                    syn::MetaItem::NameValue(ref ident, syn::Lit::Str(ref string, _))
+                )
+                if ident == "string" => {
+                    assert_eq!(*string, string.to_ascii_lowercase(),
+                               "the expected strings must be given in ASCII lowercase");
+                    string.len()
+                }
+                _ => {
+                    panic!("expected a `string = \"…\" parameter to the attribute, got {:?}", sub_attr)
+                }
+            });
+
+            max_length = lengths.max().expect("expected at least one string")
+        }
+        _ => {
+            panic!("expected a cssparser_match_ignore_ascii_case_data attribute")
+        }
+    }
+
+    let tokens = quote! {
+        const MAX_LENGTH: usize = #max_length;
+    };
+
+    tokens.as_str().parse().unwrap()
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -68,6 +68,7 @@ fn parse_border_spacing(_context: &ParserContext, input: &mut Parser)
 
 #![recursion_limit="200"]  // For color::parse_color_keyword
 
+#[macro_use] extern crate cssparser_macros;
 #[macro_use] extern crate matches;
 #[cfg(test)] extern crate encoding_rs;
 #[cfg(test)] extern crate tempdir;
@@ -123,10 +124,16 @@ macro_rules! match_ignore_ascii_case {
     // finished parsing
     (@inner $value:expr, () -> ($(($string:expr => $result:expr))*) $fallback:expr ) => {
         {
-            use std::ascii::AsciiExt;
-            match &$value[..] {
+            #[derive(cssparser__match_ignore_ascii_case__derive)]
+            #[cssparser__match_ignore_ascii_case__data($(string = $string),+)]
+            #[allow(dead_code)]
+            struct Dummy;
+
+            // MAX_LENGTH is generated by cssparser_MatchIgnoreAsciiCase_internal
+            let mut buffer: [u8; MAX_LENGTH] = unsafe { ::std::mem::uninitialized() };
+            match $crate::_match_ignore_ascii_case__to_lowercase(&mut buffer, &$value[..]) {
                 $(
-                    s if s.eq_ignore_ascii_case($string) => $result,
+                    Some($string) => $result,
                 )+
                 _ => $fallback
             }
@@ -139,6 +146,28 @@ macro_rules! match_ignore_ascii_case {
     };
 }
 
+/// Implementation detail of the match_ignore_ascii_case! macro.
+#[doc(hidden)]
+#[allow(non_snake_case)]
+pub fn _match_ignore_ascii_case__to_lowercase<'a>(buffer: &'a mut [u8], input: &'a str) -> Option<&'a str> {
+    if let Some(buffer) = buffer.get_mut(..input.len()) {
+        if let Some(first_uppercase) = input.bytes().position(|byte| matches!(byte, b'A'...b'Z')) {
+            buffer.copy_from_slice(input.as_bytes());
+            std::ascii::AsciiExt::make_ascii_lowercase(&mut buffer[first_uppercase..]);
+            unsafe {
+                Some(::std::str::from_utf8_unchecked(buffer))
+            }
+        } else {
+            // Input is already lower-case
+            Some(input)
+        }
+    } else {
+        // Input is longer than buffer, which has the length of the longest expected string:
+        // none of the expected strings would match.
+        None
+    }
+}
+
 mod rules_and_declarations;
 
 #[cfg(feature = "dummy_match_byte")]