Skip to content

Commit 1725dfc

Browse files
[asl reference] fixed free-standing words in mathematical environments
1 parent 9e8a50e commit 1725dfc

File tree

7 files changed

+227
-10
lines changed

7 files changed

+227
-10
lines changed

asllib/doc/Expressions.tex

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ \subsection{Typing}
183183
\item $\vx$ is not bound via the $\localstoragetypes$ map of the local component of $\tenv$;
184184
\item $\vx$ is bound to $(\tty, \GDKConstant)$ via the $\globalstoragetypes$ map of the global component of $\tenv$;
185185
\item $\vx$ is bound to $\vv$ via the $\constantvalues$ map of the global component of $\tenv$;
186-
\item define $newe$ as the literal expression for $\vv$;
186+
\item define $\newe$ as the literal expression for $\vv$;
187187
\item define $\vses$ as the empty set.
188188
\end{itemize}
189189

@@ -192,7 +192,7 @@ \subsection{Typing}
192192
\item $\vx$ is not bound via the $\localstoragetypes$ map of the local component of $\tenv$;
193193
\item $\vx$ is bound to $(\tty, k)$ via the $\globalstoragetypes$ map of the global component of $\tenv$;
194194
\item either $\vx$ is not bound in the $\constantvalues$ map of the global component of $\tenv$ or $k$ is not $\GDKConstant$;
195-
\item define $newe$ as $\ve$;
195+
\item define $\newe$ as $\ve$;
196196
\item define $\vses$ as the \sideeffectsetterm\ produced from $k$ by $\sesgdk$.
197197
\end{itemize}
198198

@@ -2038,7 +2038,7 @@ \subsection{Typing}
20382038
\begin{itemize}
20392039
\item $\vbitfields$ is a list with \head\ $\vfield$ and \tail\ $\vbitfieldsone$;
20402040
\item applying $\bitfieldgetname$ to $\vfield$ yields $\namep$, which is different to $\name$;
2041-
\item applying $\findbitfieldsslices$ to $\name$ and $vbitfieldsone$ yields $\vslices$\ProseOrTypeError.
2041+
\item applying $\findbitfieldsslices$ to $\name$ and $\vbitfieldsone$ yields $\vslices$\ProseOrTypeError.
20422042
\end{itemize}
20432043

20442044
\item \AllApplyCase{empty}
@@ -2812,7 +2812,7 @@ \subsection{Semantics}
28122812
\item evaluating whether the pattern $\vp$ matches the value $\vvone$ in $\env$
28132813
results in \\
28142814
$\ResultPattern(\vv, \vgtwo)$\ProseOrAbnormal,
2815-
where $vv$ is a native Boolean that determines whether there is indeed a match;
2815+
where $\vv$ is a native Boolean that determines whether there is indeed a match;
28162816
\item $\vg$ is the ordered composition of $\vgone$ and $\vgtwo$ with the $\asldata$ edge.
28172817
\end{itemize}
28182818
\FormallyParagraph

asllib/doc/PatternMatching.tex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -650,7 +650,7 @@ \subsection{Semantics}
650650
\[
651651
\begin{array}{|c|c|c|c|}
652652
\hline
653-
\textbf{$\maskmatch$} & 0 & 1 & \vx\\
653+
\maskmatch & 0 & 1 & \vx\\
654654
\hline
655655
0 & \True & \False & \True\\
656656
\hline

asllib/doc/Statements.tex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ \subsection{Typing}
779779
\FormallyParagraph
780780
\begin{mathpar}
781781
\inferrule[Well-Constrained]{%
782-
\techeck(p = \PrecisionFull, PrecisionLostDefining) \typearrow \True\OrTypeError
782+
\techeck(p = \PrecisionFull, \PrecisionLostDefining) \typearrow \True\OrTypeError
783783
}{%
784784
\checknoprecisionloss(\overname{\TInt(\WellConstrained(\Ignore, p))}{\vt}) \typearrow \True
785785
}

asllib/doc/SubprogramCalls.tex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ \chapter{Subprogram Calls\label{chap:SubprogramCalls}}
253253
\begin{itemize}
254254
\item \Proseeqdef{$\vt$}{the \head{} of $\argtypes$};
255255
\item applying $\getbitvectorwidth$ to $\tenv$ and $\vt$ yields $\vwidth$\ProseOrTypeError;
256-
\item \Proseeqdef{$paramtype$}{the \wellconstrainedintegertypeterm{} for the single
256+
\item \Proseeqdef{$\paramtype$}{the \wellconstrainedintegertypeterm{} for the single
257257
constraint consisting of the \exactconstraintterm{} for $\vwidth$};
258258
\item \Proseeqdef{$\paramsone$}{the list whose \head{} is $\params$ and \tail{} is
259259
the tuple consisting of $\paramtype$, $\vwidth$, and the empty list of \sideeffectdescriptorsetsterm}.

asllib/doc/SubprogramDeclarations.tex

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,7 +1056,7 @@ \section{Accessors\label{sec:Accessors}}
10561056
$\funcsig.\funcreturntype$ if the singleton set for $\ttyp$,
10571057
and the empty list, otherwise.}
10581058
\item \Proseeqdef{$\argtypes$}{the types of arguments in $\funcsig.\funcargs$};
1059-
\item \Proseeqdef{$tys$}{the \Proselist{$\vreturntype$}{$\argtypes$}}.
1059+
\item \Proseeqdef{$\tys$}{the \Proselist{$\vreturntype$}{$\argtypes$}}.
10601060
\end{itemize}
10611061

10621062
\FormallyParagraph
@@ -1334,7 +1334,7 @@ \section{Accessors\label{sec:Accessors}}
13341334
\item \AllApplyCase{exact}
13351335
\begin{itemize}
13361336
\item $\vc$ is an exact constraint, that is, $\ConstraintExact(\ve)$;
1337-
\item applying $\paramsofexpr$ to $\ve$ in $\tenv$ yields $ids$.
1337+
\item applying $\paramsofexpr$ to $\ve$ in $\tenv$ yields $\ids$.
13381338
\end{itemize}
13391339

13401340
\item \AllApplyCase{range}

asllib/doc/TypeDeclarations.tex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ \chapter{Type Declarations\label{chap:TypeDeclarations}}
374374
G^\tenv.\declaredtypes(\vsuper) = (\vt, \Ignore)\\
375375
{
376376
\begin{array}{r}
377-
\techeck(\astlabel(\vt) \in \{\TRecord, \TException\}, ExpectedStructuredType) \typearrow \\ \True \OrTypeError
377+
\techeck(\astlabel(\vt) \in \{\TRecord, \TException\}, \UnexpectedType) \typearrow \\ \True \OrTypeError
378378
\end{array}
379379
}\\
380380
\vt \eqname L(\fields)\\

asllib/doc/doclint.py

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,222 @@ def check_balance(content: str, env_type: str, start_pos: int) -> int:
874874
return num_errors
875875

876876

877+
def check_math_content_validity(filename: str) -> int:
878+
r"""
879+
Checks that content in mathematical environments ($...$, \[...\], \begin{mathpar}...\end{mathpar})
880+
consists only of LaTeX macros (commands starting with \) or known LaTeX symbols.
881+
Returns the number of errors found.
882+
"""
883+
num_errors = 0
884+
original_file_str = read_file_str(filename)
885+
file_str = original_file_str
886+
887+
# Remove verbatim and code environments before checking math
888+
# These environments should not be validated
889+
verbatim_patterns = [
890+
r'\\begin\{Verbatim\}.*?\\end\{Verbatim\}',
891+
r'\\begin\{verbatim\}.*?\\end\{verbatim\}',
892+
r'\\begin\{lstlisting\}.*?\\end\{lstlisting\}',
893+
r'\\verb\|.*?\|',
894+
r'\\verb\+.*?\+',
895+
r'\\verb\!.*?\!',
896+
]
897+
898+
for pattern in verbatim_patterns:
899+
file_str = re.sub(pattern, '', file_str, flags=re.DOTALL)
900+
901+
# Known LaTeX mathematical symbols and operators that don't require backslash
902+
known_symbols = set([
903+
'+', '-', '*', '/', '=', '<', '>', '!', '?', '|', ':', ';', ',', '.',
904+
'(', ')', '[', ']', '{', '}', '_', '^', '~', "'", '`', '"',
905+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
906+
' ', '\n', '\t', '&', # Whitespace and alignment
907+
])
908+
909+
# Patterns to extract mathematical environments
910+
math_patterns = [
911+
(r'\$([^\$]+)\$', 'inline math $...$'),
912+
(r'\\\[(.*?)\\\]', 'display math \\[...\\]'),
913+
(r'\\begin\{mathpar\}(.*?)\\end\{mathpar\}', 'mathpar environment'),
914+
]
915+
916+
def check_content_validity(content: str, env_type: str, start_pos: int) -> int:
917+
"""Check if content consists only of valid LaTeX macros and symbols."""
918+
errors = 0
919+
920+
# Calculate line number for error reporting using original file
921+
lines_before = original_file_str[:start_pos].count('\n')
922+
line_number = lines_before + 1
923+
924+
i = 0
925+
while i < len(content):
926+
char = content[i]
927+
928+
# Check for LaTeX comments (% to end of line)
929+
if char == '%':
930+
# Skip until newline
931+
while i < len(content) and content[i] != '\n':
932+
i += 1
933+
if i < len(content):
934+
i += 1 # Skip the newline itself
935+
continue
936+
937+
# Check for LaTeX commands (start with backslash)
938+
if char == '\\':
939+
# Find the end of the command
940+
j = i + 1
941+
# Command name consists of letters, or is a single special character
942+
if j < len(content) and content[j].isalpha():
943+
cmd_start = j
944+
while j < len(content) and content[j].isalpha():
945+
j += 1
946+
cmd_name = content[cmd_start:j]
947+
else:
948+
# Single character command like \\ or \{ or \}
949+
cmd_name = content[j] if j < len(content) else ''
950+
j += 1
951+
952+
# Skip optional asterisk (e.g., \inferrule*)
953+
if j < len(content) and content[j] == '*':
954+
j += 1
955+
956+
# Skip any whitespace and optional arguments in square brackets
957+
# For most commands, we don't skip mandatory arguments in braces - we want to check their content
958+
# Exception: \begin and \end commands have environment names in braces that should be skipped
959+
# Exception: \text, \texttt, \textXY commands contain regular text, not math, so skip their content
960+
while True:
961+
# Skip whitespace
962+
while j < len(content) and content[j] in ' \t\n':
963+
j += 1
964+
965+
# Check for optional argument [...]
966+
# These we DO skip because they're not typically math content (they're options/names)
967+
if j < len(content) and content[j] == '[':
968+
bracket_count = 1
969+
j += 1
970+
while j < len(content) and bracket_count > 0:
971+
if content[j] == '\\' and j + 1 < len(content):
972+
j += 2 # Skip escaped characters
973+
continue
974+
elif content[j] == '[':
975+
bracket_count += 1
976+
elif content[j] == ']':
977+
bracket_count -= 1
978+
j += 1
979+
continue
980+
981+
# For \begin and \end, skip the environment name argument
982+
# For \text, \texttt, \textXY (any text command), skip the text content
983+
# For \hypertarget, skip the target name
984+
# For \emph, skip the emphasized text
985+
if j < len(content) and content[j] == '{':
986+
should_skip = False
987+
988+
# Skip for \begin and \end
989+
if cmd_name in ('begin', 'end'):
990+
should_skip = True
991+
# Skip for \text and any \textXY variant
992+
elif cmd_name == 'text' or (cmd_name.startswith('text') and len(cmd_name) > 4 and cmd_name[4:].isalpha()):
993+
should_skip = True
994+
# Skip for \hypertarget
995+
elif cmd_name == 'hypertarget':
996+
should_skip = True
997+
# Skip for \emph
998+
elif cmd_name == 'emph':
999+
should_skip = True
1000+
1001+
if should_skip:
1002+
brace_count = 1
1003+
j += 1
1004+
while j < len(content) and brace_count > 0:
1005+
if content[j] == '\\' and j + 1 < len(content):
1006+
j += 2 # Skip escaped characters
1007+
continue
1008+
elif content[j] == '{':
1009+
brace_count += 1
1010+
elif content[j] == '}':
1011+
brace_count -= 1
1012+
j += 1
1013+
continue
1014+
1015+
# No more arguments to skip
1016+
break
1017+
1018+
i = j
1019+
continue
1020+
1021+
# Check if it's a known symbol
1022+
if char in known_symbols:
1023+
i += 1
1024+
continue
1025+
1026+
# Check if it's a letter (variables are allowed in math mode)
1027+
# But only single letters - multi-letter identifiers should use LaTeX commands
1028+
if char.isalpha():
1029+
# Check if this starts a sequence of letters
1030+
j = i + 1
1031+
while j < len(content) and content[j].isalpha():
1032+
j += 1
1033+
# If we have 2 or more consecutive letters, it's an error
1034+
if j - i >= 2:
1035+
word = content[i:j]
1036+
context_start = max(0, i - 10)
1037+
context_end = min(len(content), j + 10)
1038+
context = content[context_start:context_end]
1039+
print(f"{filename}:{line_number}: Multi-letter identifier '{word}' in {env_type} should use LaTeX command, context: ...{context}...")
1040+
errors += 1
1041+
i = j
1042+
continue
1043+
i += 1
1044+
continue
1045+
1046+
# If we get here, it's an unknown/invalid character
1047+
# Get context for error message
1048+
context_start = max(0, i - 10)
1049+
context_end = min(len(content), i + 10)
1050+
context = content[context_start:context_end]
1051+
print(f"{filename}:{line_number}: Invalid character '{char}' (ord={ord(char)}) in {env_type}, context: ...{context}...")
1052+
errors += 1
1053+
i += 1
1054+
1055+
return errors
1056+
1057+
# Build a list of excluded ranges (verbatim blocks) for efficient checking
1058+
excluded_ranges = []
1059+
for verb_pattern in verbatim_patterns:
1060+
for verb_match in re.finditer(verb_pattern, original_file_str, re.DOTALL):
1061+
excluded_ranges.append((verb_match.start(), verb_match.end()))
1062+
1063+
# Sort ranges for efficient binary search-like checking
1064+
excluded_ranges.sort()
1065+
1066+
def is_in_excluded_range(start: int, end: int) -> bool:
1067+
"""Check if a range overlaps with any excluded range."""
1068+
for exc_start, exc_end in excluded_ranges:
1069+
# If excluded range starts after our range ends, no more matches possible
1070+
if exc_start >= end:
1071+
break
1072+
# Check for overlap
1073+
if not (end <= exc_start or start >= exc_end):
1074+
return True
1075+
return False
1076+
1077+
# Check each type of mathematical environment
1078+
for pattern, env_type in math_patterns:
1079+
for match in re.finditer(pattern, original_file_str, re.DOTALL):
1080+
content = match.group(1)
1081+
start_pos = match.start()
1082+
end_pos = match.end()
1083+
1084+
# Skip if this math environment is inside a verbatim block
1085+
if is_in_excluded_range(start_pos, end_pos):
1086+
continue
1087+
1088+
num_errors += check_content_validity(content, env_type, start_pos)
1089+
1090+
return num_errors
1091+
1092+
8771093
def check_relation_references(latex_files: list[str]) -> int:
8781094
r"""
8791095
Checks that for each 'relation <name>' and 'function <name>' in any .spec file,
@@ -995,6 +1211,7 @@ def main():
9951211
check_rules,
9961212
check_mathpar_macro_usage,
9971213
check_balanced_parentheses_in_math,
1214+
check_math_content_validity,
9981215
],
9991216
)
10001217

0 commit comments

Comments
 (0)