Skip to content

Make match_ignore_ascii_case more efficient, add ascii_case_insensitive_phf_map #122

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Feb 25, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Use a proc-macro to optimize match_ignore_ascii_case
Previously, the compiler would emit many `eq_ignore_ascii_case` calls,
leading to code bloat and probably some slowness.

Now, we pre-lowercase the input in a stack-allocated buffer
then match exact strings.
Hopefully, the optimizer can turn this into a static table and a loop.
  • Loading branch information
SimonSapin committed Feb 24, 2017
commit 808922abbc7b410f76804e47a5f485eef38d7fa9
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ tempdir = "0.3"
encoding_rs = "0.5"

[dependencies]
cssparser-macros = {path = "./macros", version = "0.1"}
heapsize = {version = "0.3", optional = true}
matches = "0.1"
serde = {version = "0.9", optional = true}
Expand All @@ -34,3 +35,6 @@ quote = "0.3"
[features]
bench = []
dummy_match_byte = []

[workspace]
members = [".", "./macros"]
16 changes: 16 additions & 0 deletions macros/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[package]
name = "cssparser-macros"
version = "0.1.0"
authors = ["Simon Sapin <[email protected]>"]
description = "Procedural macros for cssparser"
documentation = "https://docs.rs/cssparser-macros/"
repository = "https://github.com/servo/rust-cssparser"
license = "MPL-2.0"

[lib]
path = "lib.rs"
proc-macro = true

[dependencies]
syn = "0.11"
quote = "0.3"
47 changes: 47 additions & 0 deletions macros/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

extern crate proc_macro;
#[macro_use] extern crate quote;
extern crate syn;

use std::ascii::AsciiExt;

#[proc_macro_derive(cssparser__match_ignore_ascii_case__derive,
attributes(cssparser__match_ignore_ascii_case__data))]
pub fn expand_token_stream(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
let input = syn::parse_macro_input(&input.to_string()).unwrap();

let max_length;

match input.attrs[0].value {
syn::MetaItem::List(ref ident, ref nested)
if ident == "cssparser__match_ignore_ascii_case__data" => {
let lengths = nested.iter().map(|sub_attr| match *sub_attr {
syn::NestedMetaItem::MetaItem(
syn::MetaItem::NameValue(ref ident, syn::Lit::Str(ref string, _))
)
if ident == "string" => {
assert_eq!(*string, string.to_ascii_lowercase(),
"the expected strings must be given in ASCII lowercase");
string.len()
}
_ => {
panic!("expected a `string = \"…\" parameter to the attribute, got {:?}", sub_attr)
}
});

max_length = lengths.max().expect("expected at least one string")
}
_ => {
panic!("expected a cssparser_match_ignore_ascii_case_data attribute")
}
}

let tokens = quote! {
const MAX_LENGTH: usize = #max_length;
};

tokens.as_str().parse().unwrap()
}
35 changes: 32 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ fn parse_border_spacing(_context: &ParserContext, input: &mut Parser)

#![recursion_limit="200"] // For color::parse_color_keyword

#[macro_use] extern crate cssparser_macros;
#[macro_use] extern crate matches;
#[cfg(test)] extern crate encoding_rs;
#[cfg(test)] extern crate tempdir;
Expand Down Expand Up @@ -123,10 +124,16 @@ macro_rules! match_ignore_ascii_case {
// finished parsing
(@inner $value:expr, () -> ($(($string:expr => $result:expr))*) $fallback:expr ) => {
{
use std::ascii::AsciiExt;
match &$value[..] {
#[derive(cssparser__match_ignore_ascii_case__derive)]
#[cssparser__match_ignore_ascii_case__data($(string = $string),+)]
#[allow(dead_code)]
struct Dummy;

// MAX_LENGTH is generated by cssparser_MatchIgnoreAsciiCase_internal
let mut buffer: [u8; MAX_LENGTH] = unsafe { ::std::mem::uninitialized() };
match $crate::_match_ignore_ascii_case__to_lowercase(&mut buffer, &$value[..]) {
$(
s if s.eq_ignore_ascii_case($string) => $result,
Some($string) => $result,
)+
_ => $fallback
}
Expand All @@ -139,6 +146,28 @@ macro_rules! match_ignore_ascii_case {
};
}

/// Implementation detail of the match_ignore_ascii_case! macro.
#[doc(hidden)]
#[allow(non_snake_case)]
pub fn _match_ignore_ascii_case__to_lowercase<'a>(buffer: &'a mut [u8], input: &'a str) -> Option<&'a str> {
if let Some(buffer) = buffer.get_mut(..input.len()) {
if let Some(first_uppercase) = input.bytes().position(|byte| matches!(byte, b'A'...b'Z')) {
buffer.copy_from_slice(input.as_bytes());
std::ascii::AsciiExt::make_ascii_lowercase(&mut buffer[first_uppercase..]);
unsafe {
Some(::std::str::from_utf8_unchecked(buffer))
}
} else {
// Input is already lower-case
Some(input)
}
} else {
// Input is longer than buffer, which has the length of the longest expected string:
// none of the expected strings would match.
None
}
}

mod rules_and_declarations;

#[cfg(feature = "dummy_match_byte")]
Expand Down