@@ -222,9 +222,9 @@ module type INCREMENTAL_ENGINE = sig
222
222
| Rejected
223
223
224
224
(* [offer] allows the user to resume the parser after it has suspended
225
- itself with a checkpoint of the form [InputNeeded env]. [offer] expects the
226
- old checkpoint as well as a new token and produces a new checkpoint. It does not
227
- raise any exception. *)
225
+ itself with a checkpoint of the form [InputNeeded env]. [offer] expects
226
+ the old checkpoint as well as a new token and produces a new checkpoint.
227
+ It does not raise any exception. *)
228
228
229
229
val offer :
230
230
'a checkpoint ->
@@ -233,10 +233,30 @@ module type INCREMENTAL_ENGINE = sig
233
233
234
234
(* [resume] allows the user to resume the parser after it has suspended
235
235
itself with a checkpoint of the form [AboutToReduce (env, prod)] or
236
- [HandlingError env]. [resume] expects the old checkpoint and produces a new
237
- checkpoint. It does not raise any exception. *)
236
+ [HandlingError env]. [resume] expects the old checkpoint and produces a
237
+ new checkpoint. It does not raise any exception. *)
238
+
239
+ (* The optional argument [strategy] influences the manner in which [resume]
240
+ deals with checkpoints of the form [ErrorHandling _]. Its default value
241
+ is [`Legacy]. It can be briefly described as follows:
242
+
243
+ - If the [error] token is used only to report errors (that is, if the
244
+ [error] token appears only at the end of a production, whose semantic
245
+ action raises an exception) then the simplified strategy should be
246
+ preferred. (This includes the case where the [error] token does not
247
+ appear at all in the grammar.)
248
+
249
+ - If the [error] token is used to recover after an error, or if
250
+ perfect backward compatibility is required, the legacy strategy
251
+ should be selected.
252
+
253
+ More details on these strategies appear in the file [Engine.ml]. *)
254
+
255
+ type strategy =
256
+ [ `Legacy | `Simplified ]
238
257
239
258
val resume :
259
+ ?strategy : strategy ->
240
260
'a checkpoint ->
241
261
'a checkpoint
242
262
@@ -246,7 +266,8 @@ module type INCREMENTAL_ENGINE = sig
246
266
type supplier =
247
267
unit -> token * position * position
248
268
249
- (* A pair of a lexer and a lexing buffer can be easily turned into a supplier. *)
269
+ (* A pair of a lexer and a lexing buffer can be easily turned into a
270
+ supplier. *)
250
271
251
272
val lexer_lexbuf_to_supplier :
252
273
(Lexing .lexbuf -> token ) ->
@@ -261,9 +282,11 @@ module type INCREMENTAL_ENGINE = sig
261
282
(* [loop supplier checkpoint] begins parsing from [checkpoint], reading
262
283
tokens from [supplier]. It continues parsing until it reaches a
263
284
checkpoint of the form [Accepted v] or [Rejected]. In the former case, it
264
- returns [v]. In the latter case, it raises the exception [Error]. *)
285
+ returns [v]. In the latter case, it raises the exception [Error].
286
+ The optional argument [strategy], whose default value is [Legacy],
287
+ is passed to [resume] and influences the error-handling strategy. *)
265
288
266
- val loop : supplier -> 'a checkpoint -> 'a
289
+ val loop : ? strategy : strategy -> supplier -> 'a checkpoint -> 'a
267
290
268
291
(* [loop_handle succeed fail supplier checkpoint] begins parsing from
269
292
[checkpoint], reading tokens from [supplier]. It continues parsing until
@@ -272,10 +295,10 @@ module type INCREMENTAL_ENGINE = sig
272
295
observed first). In the former case, it calls [succeed v]. In the latter
273
296
case, it calls [fail] with this checkpoint. It cannot raise [Error].
274
297
275
- This means that Menhir's traditional error-handling procedure (which pops
276
- the stack until a state that can act on the [error] token is found) does
277
- not get a chance to run. Instead, the user can implement her own error
278
- handling code, in the [fail] continuation. *)
298
+ This means that Menhir's error-handling procedure does not get a chance
299
+ to run. For this reason, there is no [strategy] parameter. Instead, the
300
+ user can implement her own error handling code, in the [fail]
301
+ continuation. *)
279
302
280
303
val loop_handle :
281
304
('a -> 'answer ) ->
@@ -943,6 +966,7 @@ module type MONOLITHIC_ENGINE = sig
943
966
exception Error
944
967
945
968
val entry :
969
+ (* strategy: *) [ `Legacy | `Simplified ] -> (* see [IncrementalEngine] *)
946
970
state ->
947
971
(Lexing .lexbuf -> token ) ->
948
972
Lexing .lexbuf ->
@@ -1066,12 +1090,20 @@ type 'a buffer
1066
1090
which internally relies on [lexer] and updates [buffer] on the fly whenever
1067
1091
a token is demanded. *)
1068
1092
1093
+ (* The type of the buffer is [(position * position) buffer], which means that
1094
+ it stores two pairs of positions, which are the start and end positions of
1095
+ the last two tokens. *)
1096
+
1069
1097
open Lexing
1070
1098
1071
1099
val wrap :
1072
1100
(lexbuf -> 'token ) ->
1073
1101
(position * position ) buffer * (lexbuf -> 'token )
1074
1102
1103
+ val wrap_supplier :
1104
+ (unit -> 'token * position * position ) ->
1105
+ (position * position ) buffer * (unit -> 'token * position * position )
1106
+
1075
1107
(* [show f buffer] prints the contents of the buffer, producing a string that
1076
1108
is typically of the form "after '%s' and before '%s'". The function [f] is
1077
1109
used to print an element. The buffer MUST be nonempty. *)
@@ -1084,6 +1116,76 @@ val show: ('a -> string) -> 'a buffer -> string
1084
1116
val last : 'a buffer -> 'a
1085
1117
1086
1118
(* -------------------------------------------------------------------------- *)
1119
+
1120
+ (* [extract text (pos1, pos2)] extracts the sub-string of [text] delimited
1121
+ by the positions [pos1] and [pos2]. *)
1122
+
1123
+ val extract : string -> position * position -> string
1124
+
1125
+ (* [sanitize text] eliminates any special characters from the text [text].
1126
+ A special character is a character whose ASCII code is less than 32.
1127
+ Every special character is replaced with a single space character. *)
1128
+
1129
+ val sanitize : string -> string
1130
+
1131
+ (* [compress text] replaces every run of at least one whitespace character
1132
+ with exactly one space character. *)
1133
+
1134
+ val compress : string -> string
1135
+
1136
+ (* [shorten k text] limits the length of [text] to [2k+3] characters. If the
1137
+ text is too long, a fragment in the middle is replaced with an ellipsis. *)
1138
+
1139
+ val shorten : int -> string -> string
1140
+
1141
+ (* [expand f text] searches [text] for occurrences of [$k], where [k]
1142
+ is a nonnegative integer literal, and replaces each such occurrence
1143
+ with the string [f k]. *)
1144
+
1145
+ val expand : (int -> string ) -> string -> string
1146
+ end
1147
+ module LexerUtil : sig
1148
+ (* *****************************************************************************)
1149
+ (* *)
1150
+ (* Menhir *)
1151
+ (* *)
1152
+ (* François Pottier, Inria Paris *)
1153
+ (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1154
+ (* *)
1155
+ (* Copyright Inria. All rights reserved. This file is distributed under the *)
1156
+ (* terms of the GNU Library General Public License version 2, with a *)
1157
+ (* special exception on linking, as described in the file LICENSE. *)
1158
+ (* *)
1159
+ (* *****************************************************************************)
1160
+
1161
+ open Lexing
1162
+
1163
+ (* [init filename lexbuf] initializes the lexing buffer [lexbuf] so
1164
+ that the positions that are subsequently read from it refer to the
1165
+ file [filename]. It returns [lexbuf]. *)
1166
+
1167
+ val init : string -> lexbuf -> lexbuf
1168
+
1169
+ (* [read filename] reads the entire contents of the file [filename] and
1170
+ returns a pair of this content (a string) and a lexing buffer that
1171
+ has been initialized, based on this string. *)
1172
+
1173
+ val read : string -> string * lexbuf
1174
+
1175
+ (* [newline lexbuf] increments the line counter stored within [lexbuf]. It
1176
+ should be invoked by the lexer itself every time a newline character is
1177
+ consumed. This allows maintaining a current the line number in [lexbuf]. *)
1178
+
1179
+ val newline : lexbuf -> unit
1180
+
1181
+ (* [range (startpos, endpos)] prints a textual description of the range
1182
+ delimited by the start and end positions [startpos] and [endpos].
1183
+ This description is one line long and ends in a newline character.
1184
+ This description mentions the file name, the line number, and a range
1185
+ of characters on this line. The line number is correct only if [newline]
1186
+ has been correctly used, as described dabove. *)
1187
+
1188
+ val range : position * position -> string
1087
1189
end
1088
1190
module Printers : sig
1089
1191
(* *****************************************************************************)
@@ -1701,5 +1803,5 @@ module MakeEngineTable
1701
1803
and type nonterminal = int
1702
1804
end
1703
1805
module StaticVersion : sig
1704
- val require_20190924 : unit
1806
+ val require_20210419 : unit
1705
1807
end
0 commit comments