Skip to content

Hasten macro parsing #68848

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Avoid instantiating many Parser structs in generic_extension.
Currently, every iteration of the main loop in `generic_extension`
instantiates a `Parser`, which is expensive because `Parser` is a large
type. Many of those instantiations are only used immutably, particularly
for simple-but-repetitive macros of the sort seen in `html5ever` and PR
68836.

This commit initializes a single parser outside the loop, and then uses
`Cow` to avoid cloning it except for the mutating iterations. This
speeds up `html5ever` runs by up to 15%.
  • Loading branch information
nnethercote committed Feb 10, 2020
commit 67da45f5084f98eeb20cc6022d68788510dc832a
38 changes: 9 additions & 29 deletions src/librustc_expand/mbe/macro_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,20 +78,19 @@ use crate::mbe::{self, TokenTree};

use rustc_ast_pretty::pprust;
use rustc_parse::parser::{FollowedByType, Parser, PathStyle};
use rustc_parse::Directory;
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{kw, sym, Symbol};
use syntax::ast::{Ident, Name};
use syntax::ptr::P;
use syntax::token::{self, DocComment, Nonterminal, Token};
use syntax::tokenstream::TokenStream;

use rustc_errors::{FatalError, PResult};
use rustc_span::Span;
use smallvec::{smallvec, SmallVec};

use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use std::borrow::Cow;
use std::collections::hash_map::Entry::{Occupied, Vacant};
use std::mem;
use std::ops::{Deref, DerefMut};
Expand Down Expand Up @@ -613,28 +612,9 @@ fn inner_parse_loop<'root, 'tt>(
Success(())
}

/// Use the given sequence of token trees (`ms`) as a matcher. Match the given token stream `tts`
/// against it and return the match.
///
/// # Parameters
///
/// - `sess`: The session into which errors are emitted
/// - `tts`: The tokenstream we are matching against the pattern `ms`
/// - `ms`: A sequence of token trees representing a pattern against which we are matching
/// - `directory`: Information about the file locations (needed for the black-box parser)
/// - `recurse_into_modules`: Whether or not to recurse into modules (needed for the black-box
/// parser)
pub(super) fn parse(
sess: &ParseSess,
tts: TokenStream,
ms: &[TokenTree],
directory: Option<Directory<'_>>,
recurse_into_modules: bool,
) -> NamedParseResult {
// Create a parser that can be used for the "black box" parts.
let mut parser =
Parser::new(sess, tts, directory, recurse_into_modules, true, rustc_parse::MACRO_ARGUMENTS);

/// Use the given sequence of token trees (`ms`) as a matcher. Match the token
/// stream from the given `parser` against it and return the match.
pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> NamedParseResult {
// A queue of possible matcher positions. We initialize it with the matcher position in which
// the "dot" is before the first token of the first token tree in `ms`. `inner_parse_loop` then
// processes all of these possible matcher positions and produces possible next positions into
Expand All @@ -659,7 +639,7 @@ pub(super) fn parse(
// parsing from the black-box parser done. The result is that `next_items` will contain a
// bunch of possible next matcher positions in `next_items`.
match inner_parse_loop(
sess,
parser.sess,
&mut cur_items,
&mut next_items,
&mut eof_items,
Expand All @@ -684,7 +664,7 @@ pub(super) fn parse(
if eof_items.len() == 1 {
let matches =
eof_items[0].matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap());
return nameize(sess, ms, matches);
return nameize(parser.sess, ms, matches);
} else if eof_items.len() > 1 {
return Error(
parser.token.span,
Expand Down Expand Up @@ -736,13 +716,13 @@ pub(super) fn parse(
// If there are no possible next positions AND we aren't waiting for the black-box parser,
// then there is a syntax error.
else if bb_items.is_empty() && next_items.is_empty() {
return Failure(parser.token.take(), "no rules expected this token in macro call");
return Failure(parser.token.clone(), "no rules expected this token in macro call");
}
// Dump all possible `next_items` into `cur_items` for the next iteration.
else if !next_items.is_empty() {
// Now process the next token
cur_items.extend(next_items.drain(..));
parser.bump();
parser.to_mut().bump();
}
// Finally, we have the case where we need to call the black-box parser to get some
// nonterminal.
Expand All @@ -754,7 +734,7 @@ pub(super) fn parse(
let match_cur = item.match_cur;
item.push_match(
match_cur,
MatchedNonterminal(Lrc::new(parse_nt(&mut parser, span, ident.name))),
MatchedNonterminal(Lrc::new(parse_nt(parser.to_mut(), span, ident.name))),
);
item.idx += 1;
item.match_cur += 1;
Expand Down
59 changes: 43 additions & 16 deletions src/librustc_expand/mbe/macro_rules.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use crate::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander};
use crate::base::{DummyResult, ExpansionData, ExtCtxt, MacResult, TTMacroExpander};
use crate::base::{SyntaxExtension, SyntaxExtensionKind};
use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind};
use crate::mbe;
use crate::mbe::macro_check;
use crate::mbe::macro_parser::parse;
use crate::mbe::macro_parser::parse_tt;
use crate::mbe::macro_parser::{Error, Failure, Success};
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, NamedParseResult};
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq};
use crate::mbe::transcribe::transcribe;

use rustc_ast_pretty::pprust;
Expand Down Expand Up @@ -166,9 +166,9 @@ impl TTMacroExpander for MacroRulesMacroExpander {
}
}

fn trace_macros_note(cx: &mut ExtCtxt<'_>, sp: Span, message: String) {
fn trace_macros_note(cx_expansions: &mut FxHashMap<Span, Vec<String>>, sp: Span, message: String) {
let sp = sp.macro_backtrace().last().map(|trace| trace.call_site).unwrap_or(sp);
cx.expansions.entry(sp).or_default().push(message);
cx_expansions.entry(sp).or_default().push(message);
}

/// Given `lhses` and `rhses`, this is the new macro we create
Expand All @@ -184,11 +184,33 @@ fn generic_extension<'cx>(
) -> Box<dyn MacResult + 'cx> {
if cx.trace_macros() {
let msg = format!("expanding `{}! {{ {} }}`", name, pprust::tts_to_string(arg.clone()));
trace_macros_note(cx, sp, msg);
trace_macros_note(&mut cx.expansions, sp, msg);
}

// Which arm's failure should we report? (the one furthest along)
let mut best_failure: Option<(Token, &str)> = None;

// We create a base parser that can be used for the "black box" parts.
// Every iteration needs a fresh copy of that parser. However, the parser
// is not mutated on many of the iterations, particularly when dealing with
// macros like this:
//
// macro_rules! foo {
// ("a") => (A);
// ("b") => (B);
// ("c") => (C);
// // ... etc. (maybe hundreds more)
// }
//
// as seen in the `html5ever` benchmark. We use a `Cow` so that the base
// parser is only cloned when necessary (upon mutation). Furthermore, we
// reinitialize the `Cow` with the base parser at the start of every
// iteration, so that any mutated parsers are not reused. This is all quite
// hacky, but speeds up the `html5ever` benchmark significantly. (Issue
// 68836 suggests a more comprehensive but more complex change to deal with
// this situation.)
let parser = parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone());

for (i, lhs) in lhses.iter().enumerate() {
// try each arm's matchers
let lhs_tt = match *lhs {
Expand All @@ -202,7 +224,7 @@ fn generic_extension<'cx>(
// are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
let mut gated_spans_snaphot = mem::take(&mut *cx.parse_sess.gated_spans.spans.borrow_mut());

match parse_tt(cx, lhs_tt, arg.clone()) {
match parse_tt(&mut Cow::Borrowed(&parser), lhs_tt) {
Success(named_matches) => {
// The matcher was `Success(..)`ful.
// Merge the gated spans from parsing the matcher with the pre-existing ones.
Expand Down Expand Up @@ -232,7 +254,7 @@ fn generic_extension<'cx>(

if cx.trace_macros() {
let msg = format!("to `{}`", pprust::tts_to_string(tts.clone()));
trace_macros_note(cx, sp, msg);
trace_macros_note(&mut cx.expansions, sp, msg);
}

let directory = Directory {
Expand Down Expand Up @@ -269,6 +291,7 @@ fn generic_extension<'cx>(
// Restore to the state before snapshotting and maybe try again.
mem::swap(&mut gated_spans_snaphot, &mut cx.parse_sess.gated_spans.spans.borrow_mut());
}
drop(parser);

let (token, label) = best_failure.expect("ran no matchers");
let span = token.span.substitute_dummy(sp);
Expand All @@ -286,7 +309,8 @@ fn generic_extension<'cx>(
mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..],
_ => continue,
};
match parse_tt(cx, lhs_tt, arg.clone()) {
let parser = parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone());
match parse_tt(&mut Cow::Borrowed(&parser), lhs_tt) {
Success(_) => {
if comma_span.is_dummy() {
err.note("you might be missing a comma");
Expand Down Expand Up @@ -368,7 +392,8 @@ pub fn compile_declarative_macro(
),
];

let argument_map = match parse(sess, body, &argument_gram, None, true) {
let parser = Parser::new(sess, body, None, true, true, rustc_parse::MACRO_ARGUMENTS);
let argument_map = match parse_tt(&mut Cow::Borrowed(&parser), &argument_gram) {
Success(m) => m,
Failure(token, msg) => {
let s = parse_failure_msg(&token);
Expand Down Expand Up @@ -1184,14 +1209,16 @@ fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
}
}

/// Use this token tree as a matcher to parse given tts.
fn parse_tt(cx: &ExtCtxt<'_>, mtch: &[mbe::TokenTree], tts: TokenStream) -> NamedParseResult {
// `None` is because we're not interpolating
fn parser_from_cx<'cx>(
current_expansion: &'cx ExpansionData,
sess: &'cx ParseSess,
tts: TokenStream,
) -> Parser<'cx> {
let directory = Directory {
path: Cow::from(cx.current_expansion.module.directory.as_path()),
ownership: cx.current_expansion.directory_ownership,
path: Cow::from(current_expansion.module.directory.as_path()),
ownership: current_expansion.directory_ownership,
};
parse(cx.parse_sess(), tts, mtch, Some(directory), true)
Parser::new(sess, tts, Some(directory), true, true, rustc_parse::MACRO_ARGUMENTS)
}

/// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For
Expand Down