Skip to content

Commit 694b9ac

Browse files
lpw25stedolan
authored andcommitted
Add syntax to the parser for local allocations (#26)
1 parent f183008 commit 694b9ac

36 files changed

+8903
-5992
lines changed

boot/menhir/menhirLib.ml

Lines changed: 332 additions & 53 deletions
Large diffs are not rendered by default.

boot/menhir/menhirLib.mli

Lines changed: 115 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -222,9 +222,9 @@ module type INCREMENTAL_ENGINE = sig
222222
| Rejected
223223

224224
(* [offer] allows the user to resume the parser after it has suspended
225-
itself with a checkpoint of the form [InputNeeded env]. [offer] expects the
226-
old checkpoint as well as a new token and produces a new checkpoint. It does not
227-
raise any exception. *)
225+
itself with a checkpoint of the form [InputNeeded env]. [offer] expects
226+
the old checkpoint as well as a new token and produces a new checkpoint.
227+
It does not raise any exception. *)
228228

229229
val offer:
230230
'a checkpoint ->
@@ -233,10 +233,30 @@ module type INCREMENTAL_ENGINE = sig
233233

234234
(* [resume] allows the user to resume the parser after it has suspended
235235
itself with a checkpoint of the form [AboutToReduce (env, prod)] or
236-
[HandlingError env]. [resume] expects the old checkpoint and produces a new
237-
checkpoint. It does not raise any exception. *)
236+
[HandlingError env]. [resume] expects the old checkpoint and produces a
237+
new checkpoint. It does not raise any exception. *)
238+
239+
(* The optional argument [strategy] influences the manner in which [resume]
240+
deals with checkpoints of the form [ErrorHandling _]. Its default value
241+
is [`Legacy]. It can be briefly described as follows:
242+
243+
- If the [error] token is used only to report errors (that is, if the
244+
[error] token appears only at the end of a production, whose semantic
245+
action raises an exception) then the simplified strategy should be
246+
preferred. (This includes the case where the [error] token does not
247+
appear at all in the grammar.)
248+
249+
- If the [error] token is used to recover after an error, or if
250+
perfect backward compatibility is required, the legacy strategy
251+
should be selected.
252+
253+
More details on these strategies appear in the file [Engine.ml]. *)
254+
255+
type strategy =
256+
[ `Legacy | `Simplified ]
238257

239258
val resume:
259+
?strategy:strategy ->
240260
'a checkpoint ->
241261
'a checkpoint
242262

@@ -246,7 +266,8 @@ module type INCREMENTAL_ENGINE = sig
246266
type supplier =
247267
unit -> token * position * position
248268

249-
(* A pair of a lexer and a lexing buffer can be easily turned into a supplier. *)
269+
(* A pair of a lexer and a lexing buffer can be easily turned into a
270+
supplier. *)
250271

251272
val lexer_lexbuf_to_supplier:
252273
(Lexing.lexbuf -> token) ->
@@ -261,9 +282,11 @@ module type INCREMENTAL_ENGINE = sig
261282
(* [loop supplier checkpoint] begins parsing from [checkpoint], reading
262283
tokens from [supplier]. It continues parsing until it reaches a
263284
checkpoint of the form [Accepted v] or [Rejected]. In the former case, it
264-
returns [v]. In the latter case, it raises the exception [Error]. *)
285+
returns [v]. In the latter case, it raises the exception [Error].
286+
The optional argument [strategy], whose default value is [Legacy],
287+
is passed to [resume] and influences the error-handling strategy. *)
265288

266-
val loop: supplier -> 'a checkpoint -> 'a
289+
val loop: ?strategy:strategy -> supplier -> 'a checkpoint -> 'a
267290

268291
(* [loop_handle succeed fail supplier checkpoint] begins parsing from
269292
[checkpoint], reading tokens from [supplier]. It continues parsing until
@@ -272,10 +295,10 @@ module type INCREMENTAL_ENGINE = sig
272295
observed first). In the former case, it calls [succeed v]. In the latter
273296
case, it calls [fail] with this checkpoint. It cannot raise [Error].
274297
275-
This means that Menhir's traditional error-handling procedure (which pops
276-
the stack until a state that can act on the [error] token is found) does
277-
not get a chance to run. Instead, the user can implement her own error
278-
handling code, in the [fail] continuation. *)
298+
This means that Menhir's error-handling procedure does not get a chance
299+
to run. For this reason, there is no [strategy] parameter. Instead, the
300+
user can implement her own error handling code, in the [fail]
301+
continuation. *)
279302

280303
val loop_handle:
281304
('a -> 'answer) ->
@@ -943,6 +966,7 @@ module type MONOLITHIC_ENGINE = sig
943966
exception Error
944967

945968
val entry:
969+
(* strategy: *) [ `Legacy | `Simplified ] -> (* see [IncrementalEngine] *)
946970
state ->
947971
(Lexing.lexbuf -> token) ->
948972
Lexing.lexbuf ->
@@ -1066,12 +1090,20 @@ type 'a buffer
10661090
which internally relies on [lexer] and updates [buffer] on the fly whenever
10671091
a token is demanded. *)
10681092

1093+
(* The type of the buffer is [(position * position) buffer], which means that
1094+
it stores two pairs of positions, which are the start and end positions of
1095+
the last two tokens. *)
1096+
10691097
open Lexing
10701098

10711099
val wrap:
10721100
(lexbuf -> 'token) ->
10731101
(position * position) buffer * (lexbuf -> 'token)
10741102

1103+
val wrap_supplier:
1104+
(unit -> 'token * position * position) ->
1105+
(position * position) buffer * (unit -> 'token * position * position)
1106+
10751107
(* [show f buffer] prints the contents of the buffer, producing a string that
10761108
is typically of the form "after '%s' and before '%s'". The function [f] is
10771109
used to print an element. The buffer MUST be nonempty. *)
@@ -1084,6 +1116,76 @@ val show: ('a -> string) -> 'a buffer -> string
10841116
val last: 'a buffer -> 'a
10851117

10861118
(* -------------------------------------------------------------------------- *)
1119+
1120+
(* [extract text (pos1, pos2)] extracts the sub-string of [text] delimited
1121+
by the positions [pos1] and [pos2]. *)
1122+
1123+
val extract: string -> position * position -> string
1124+
1125+
(* [sanitize text] eliminates any special characters from the text [text].
1126+
A special character is a character whose ASCII code is less than 32.
1127+
Every special character is replaced with a single space character. *)
1128+
1129+
val sanitize: string -> string
1130+
1131+
(* [compress text] replaces every run of at least one whitespace character
1132+
with exactly one space character. *)
1133+
1134+
val compress: string -> string
1135+
1136+
(* [shorten k text] limits the length of [text] to [2k+3] characters. If the
1137+
text is too long, a fragment in the middle is replaced with an ellipsis. *)
1138+
1139+
val shorten: int -> string -> string
1140+
1141+
(* [expand f text] searches [text] for occurrences of [$k], where [k]
1142+
is a nonnegative integer literal, and replaces each such occurrence
1143+
with the string [f k]. *)
1144+
1145+
val expand: (int -> string) -> string -> string
1146+
end
1147+
module LexerUtil : sig
1148+
(******************************************************************************)
1149+
(* *)
1150+
(* Menhir *)
1151+
(* *)
1152+
(* François Pottier, Inria Paris *)
1153+
(* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1154+
(* *)
1155+
(* Copyright Inria. All rights reserved. This file is distributed under the *)
1156+
(* terms of the GNU Library General Public License version 2, with a *)
1157+
(* special exception on linking, as described in the file LICENSE. *)
1158+
(* *)
1159+
(******************************************************************************)
1160+
1161+
open Lexing
1162+
1163+
(* [init filename lexbuf] initializes the lexing buffer [lexbuf] so
1164+
that the positions that are subsequently read from it refer to the
1165+
file [filename]. It returns [lexbuf]. *)
1166+
1167+
val init: string -> lexbuf -> lexbuf
1168+
1169+
(* [read filename] reads the entire contents of the file [filename] and
1170+
returns a pair of this content (a string) and a lexing buffer that
1171+
has been initialized, based on this string. *)
1172+
1173+
val read: string -> string * lexbuf
1174+
1175+
(* [newline lexbuf] increments the line counter stored within [lexbuf]. It
1176+
should be invoked by the lexer itself every time a newline character is
1177+
consumed. This allows maintaining a current the line number in [lexbuf]. *)
1178+
1179+
val newline: lexbuf -> unit
1180+
1181+
(* [range (startpos, endpos)] prints a textual description of the range
1182+
delimited by the start and end positions [startpos] and [endpos].
1183+
This description is one line long and ends in a newline character.
1184+
This description mentions the file name, the line number, and a range
1185+
of characters on this line. The line number is correct only if [newline]
1186+
has been correctly used, as described dabove. *)
1187+
1188+
val range: position * position -> string
10871189
end
10881190
module Printers : sig
10891191
(******************************************************************************)
@@ -1701,5 +1803,5 @@ module MakeEngineTable
17011803
and type nonterminal = int
17021804
end
17031805
module StaticVersion : sig
1704-
val require_20190924 : unit
1806+
val require_20210419: unit
17051807
end

0 commit comments

Comments
 (0)