You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: SRC/patternSystem.cpp
+80-35Lines changed: 80 additions & 35 deletions
Original file line number
Diff line number
Diff line change
@@ -19,16 +19,16 @@
19
19
20
20
#defineINFINITE_MATCH (-(200 << 8)) // allowed to match anywhere
21
21
22
-
#defineNOT_BIT0X00010000
23
-
#defineFREEMODE_BIT0X00020000
24
-
#defineQUOTE_BIT0X00080000
22
+
#defineNOT_BIT0X00010000
23
+
#defineFREEMODE_BIT0X00020000
24
+
#defineQUOTE_BIT0X00080000
25
+
#defineNOTNOT_BIT0X00400000
25
26
#defineWILDGAP0X20000000// start of gap is 0x000000ff, limit of gap is 0x0000ff00
26
27
#defineWILDMEMORIZEGAP0X40000000// start of gap is 0x000000ff, limit of gap is 0x0000ff00
27
-
#defineWILDMEMORIZESPECIFIC0X80000000// while 0x1f0000 is wildcard index to use
28
+
#defineWILDMEMORIZESPECIFIC0X80000000// while 0x1f0000 is wildcard index to use
28
29
#defineGAP_SHIFT16
29
30
#defineSPECIFIC_SHIFT24
30
31
#defineGAPLIMITSHIFT8
31
-
#defineNOTNOT_BIT0X00400000
32
32
33
33
bool matching = false;
34
34
bool clearUnmarks = false;
@@ -286,14 +286,14 @@ static bool FindPhrase(char* word, int start,bool reverse, int & actualStart, in
286
286
return matched;
287
287
}
288
288
289
-
char* PushMatch()
289
+
staticchar* PushMatch(int used)
290
290
{
291
291
char* limit;
292
292
char* base = InfiniteStack64(limit,"PushMatch");
293
293
int* vals = (int*)base;
294
-
for (int i = 0; i < MAX_WILDCARDS; ++i) *vals++ = wildcardPosition[i];
294
+
for (int i = 0; i < used; ++i) *vals++ = wildcardPosition[i];
295
295
char* rest = (char*) vals;
296
-
for (int i = 0; i < MAX_WILDCARDS; ++i)
296
+
for (int i = 0; i < used; ++i)
297
297
{
298
298
strcpy(rest, wildcardOriginalText[i]);
299
299
rest += strlen(rest) + 1;
@@ -304,12 +304,12 @@ char* PushMatch()
304
304
return base;
305
305
}
306
306
307
-
voidPopMatch(char* base)
307
+
staticvoidPopMatch(char* base,int used)
308
308
{
309
309
int* vals = (int*)base;
310
-
for (int i = 0; i < MAX_WILDCARDS; ++i) wildcardPosition[i] = *vals++;
310
+
for (int i = 0; i < used; ++i) wildcardPosition[i] = *vals++;
311
311
char* rest = (char*)vals;
312
-
for (int i = 0; i < MAX_WILDCARDS; ++i)
312
+
for (int i = 0; i < used; ++i)
313
313
{
314
314
strcpy(wildcardOriginalText[i],rest);
315
315
rest += strlen(rest) + 1;
@@ -318,13 +318,35 @@ void PopMatch(char* base)
318
318
}
319
319
ReleaseStack(base);
320
320
}
321
+
#ifdef INFORMATION
322
+
323
+
We keep a positional range reference within the sentence where we are(positionStart and positionEnd).
324
+
Before we attempt the next match we make a backup copy(oldStart and oldEnd)
325
+
so that if the match fails, we can revert back to where we were(under some circumstances).
326
+
327
+
We keep a variable firstMatched to track the first real word we have matched so far.
328
+
If the whole match is declared a failure eventually, we may be allowed to go back and
329
+
retry matching starting immediately after that location. That is, we do not do all possible backtracking
330
+
as Prolog might, but we do a cheaper form where we simply try again farther in the sentence.
331
+
Also, firstMatched is returned from a subcall, so the caller can know where to end a wildcard memorization
332
+
started before the subcall.
333
+
334
+
Some tokens create a wildcard effect, where the next thing is allowed to be some distance away.
335
+
This is tracked by wildcardSelector, and the token after the wildcard, when found, is checked to see
336
+
if its position is allowed.When we enter a choice construct like[] and {}, when a choice fails,
337
+
we reset the wildcardSelector back to originalWildcardSelector so the next choice sees the same environment.
338
+
339
+
In reverse mode, the range of positionStart and positionEnd continue to be earlier and later in the sentence,
340
+
but validation treats positionStart as the basis of measuring distance.
341
+
342
+
Rebindable refers to ability to relocate firstmatched on failure (1 means we can shift from here, 3 means we enforce spacing and cannot rebind)
343
+
Some operations like < or @_0+ force a specific position, and if no firstMatch has yet happened, then you cannot change
344
+
the start location.
345
+
346
+
returnStart and returnEnd are the range of the match that happened when making a subcall.
347
+
Startposition is where we start matching from.
348
+
#endif
321
349
322
-
// NOTE: in reverse mode, positionStart is still earlier in the sentence than PositionEnd. We do not flip viewpoint.
323
-
// rebindable refers to ability to relocate firstmatched on failure (1 means we can shift from here, 3 means we enforce spacing and cannot rebind)
324
-
// returnStart and returnEnd are the range of the match that happened
325
-
// Firstmatched is a real word (not wildcard) where we first bound a match (for rebinding restarts)
326
-
// Startposition is where we start matching from
327
-
// wildcardSelector is current wildcard hunting status
328
350
boolMatch(char* buffer,char* ptr, unsignedint depth, int startposition, char* kind, int rebindable,unsignedint wildcardSelector,
329
351
int &returnstart,int& returnend,int &uppercasem,int& firstMatched,int positionStart,int positionEnd, bool reverse)
330
352
{// always STARTS past initial opening thing ( [ { and ends with closing matching thing
@@ -350,7 +372,8 @@ bool Match(char* buffer,char* ptr, unsigned int depth, int startposition, char*
350
372
int slidingStart = startposition;
351
373
firstMatched = -1; // () should return spot it started (firstMatched) so caller has ability to bind any wild card before it
352
374
if (rebindable == 1) slidingStart = positionStart = INFINITE_MATCH; // INFINITE_MATCH means we are in initial startup, allows us to match ANYWHERE forward to start
353
-
positionEnd = startposition; // we scan starting 1 after this
375
+
int originalWildcardSelector = wildcardSelector;
376
+
positionEnd = startposition; // we scan starting 1 after this
354
377
int basicStart = startposition; // we must not match real stuff any earlier than here
355
378
char* argumentText = NULL; // pushed original text from a function arg -- function arg never decodes to name another function arg, we would have expanded it instead
356
379
uppercaseFind = -1;
@@ -366,6 +389,7 @@ bool Match(char* buffer,char* ptr, unsigned int depth, int startposition, char*
366
389
if (*word == '>' && word[1] == '>') ++nextTokenStart; // skip the 1st > of >> form
367
390
nextTokenStart = SkipWhitespace(nextTokenStart+1); // ignore blanks after if token is a simple single thing like !
368
391
char c = *word;
392
+
bool foundaword = false;
369
393
if (deeptrace) Log(STDTRACELOG,(char*)" token:%s ",word);
370
394
switch(c)
371
395
{
@@ -482,7 +506,7 @@ bool Match(char* buffer,char* ptr, unsigned int depth, int startposition, char*
returnEnd,uppercasemat,whenmatched,positionStart,positionEnd,reverse); // subsection ok - it is allowed to set position vars, if ! get used, they dont matter because we fail
// d) literal quote \" - system outputs the quote only (script has nothing or blank or tab or ` after it
2289
2289
// e) internal "`xxxxx`" - argument to tcpopen pass back untouched stripping the markers on both ends - allows us to pay no attention to OTHER quotes within
2290
2290
char c;
2291
-
int n = limit; // quote must close within this limit
2291
+
int n = limit - 2; // quote must close within this limit
2292
2292
char* start = ptr;
2293
2293
char* original = buffer;
2294
2294
// "` is an internal marker of argument passed from TCPOPEN "'arguments'" ) , return the section untouched as one lump
if (!n) Log(STDTRACELOG,(char*)"bad double-quoting? %s %d %s - size is %d but limit is %d\r\n",start,currentFileLine,currentFilename,buffer-start,MAX_WORD_SIZE);
2339
+
*buffer = 0;
2340
+
if (!n) Log(STDTRACELOG,(char*)"bad double-quoting? %s %d %s - string size exceeds limit of %d\r\n",start,currentFileLine,currentFilename,limit);
0 commit comments