@@ -874,6 +874,222 @@ def check_balance(content: str, env_type: str, start_pos: int) -> int:
874874 return num_errors
875875
876876
877+ def check_math_content_validity (filename : str ) -> int :
878+ r"""
879+ Checks that content in mathematical environments ($...$, \[...\], \begin{mathpar}...\end{mathpar})
880+ consists only of LaTeX macros (commands starting with \) or known LaTeX symbols.
881+ Returns the number of errors found.
882+ """
883+ num_errors = 0
884+ original_file_str = read_file_str (filename )
885+ file_str = original_file_str
886+
887+ # Remove verbatim and code environments before checking math
888+ # These environments should not be validated
889+ verbatim_patterns = [
890+ r'\\begin\{Verbatim\}.*?\\end\{Verbatim\}' ,
891+ r'\\begin\{verbatim\}.*?\\end\{verbatim\}' ,
892+ r'\\begin\{lstlisting\}.*?\\end\{lstlisting\}' ,
893+ r'\\verb\|.*?\|' ,
894+ r'\\verb\+.*?\+' ,
895+ r'\\verb\!.*?\!' ,
896+ ]
897+
898+ for pattern in verbatim_patterns :
899+ file_str = re .sub (pattern , '' , file_str , flags = re .DOTALL )
900+
901+ # Known LaTeX mathematical symbols and operators that don't require backslash
902+ known_symbols = set ([
903+ '+' , '-' , '*' , '/' , '=' , '<' , '>' , '!' , '?' , '|' , ':' , ';' , ',' , '.' ,
904+ '(' , ')' , '[' , ']' , '{' , '}' , '_' , '^' , '~' , "'" , '`' , '"' ,
905+ '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' ,
906+ ' ' , '\n ' , '\t ' , '&' , # Whitespace and alignment
907+ ])
908+
909+ # Patterns to extract mathematical environments
910+ math_patterns = [
911+ (r'\$([^\$]+)\$' , 'inline math $...$' ),
912+ (r'\\\[(.*?)\\\]' , 'display math \\ [...\\ ]' ),
913+ (r'\\begin\{mathpar\}(.*?)\\end\{mathpar\}' , 'mathpar environment' ),
914+ ]
915+
916+ def check_content_validity (content : str , env_type : str , start_pos : int ) -> int :
917+ """Check if content consists only of valid LaTeX macros and symbols."""
918+ errors = 0
919+
920+ # Calculate line number for error reporting using original file
921+ lines_before = original_file_str [:start_pos ].count ('\n ' )
922+ line_number = lines_before + 1
923+
924+ i = 0
925+ while i < len (content ):
926+ char = content [i ]
927+
928+ # Check for LaTeX comments (% to end of line)
929+ if char == '%' :
930+ # Skip until newline
931+ while i < len (content ) and content [i ] != '\n ' :
932+ i += 1
933+ if i < len (content ):
934+ i += 1 # Skip the newline itself
935+ continue
936+
937+ # Check for LaTeX commands (start with backslash)
938+ if char == '\\ ' :
939+ # Find the end of the command
940+ j = i + 1
941+ # Command name consists of letters, or is a single special character
942+ if j < len (content ) and content [j ].isalpha ():
943+ cmd_start = j
944+ while j < len (content ) and content [j ].isalpha ():
945+ j += 1
946+ cmd_name = content [cmd_start :j ]
947+ else :
948+ # Single character command like \\ or \{ or \}
949+ cmd_name = content [j ] if j < len (content ) else ''
950+ j += 1
951+
952+ # Skip optional asterisk (e.g., \inferrule*)
953+ if j < len (content ) and content [j ] == '*' :
954+ j += 1
955+
956+ # Skip any whitespace and optional arguments in square brackets
957+ # For most commands, we don't skip mandatory arguments in braces - we want to check their content
958+ # Exception: \begin and \end commands have environment names in braces that should be skipped
959+ # Exception: \text, \texttt, \textXY commands contain regular text, not math, so skip their content
960+ while True :
961+ # Skip whitespace
962+ while j < len (content ) and content [j ] in ' \t \n ' :
963+ j += 1
964+
965+ # Check for optional argument [...]
966+ # These we DO skip because they're not typically math content (they're options/names)
967+ if j < len (content ) and content [j ] == '[' :
968+ bracket_count = 1
969+ j += 1
970+ while j < len (content ) and bracket_count > 0 :
971+ if content [j ] == '\\ ' and j + 1 < len (content ):
972+ j += 2 # Skip escaped characters
973+ continue
974+ elif content [j ] == '[' :
975+ bracket_count += 1
976+ elif content [j ] == ']' :
977+ bracket_count -= 1
978+ j += 1
979+ continue
980+
981+ # For \begin and \end, skip the environment name argument
982+ # For \text, \texttt, \textXY (any text command), skip the text content
983+ # For \hypertarget, skip the target name
984+ # For \emph, skip the emphasized text
985+ if j < len (content ) and content [j ] == '{' :
986+ should_skip = False
987+
988+ # Skip for \begin and \end
989+ if cmd_name in ('begin' , 'end' ):
990+ should_skip = True
991+ # Skip for \text and any \textXY variant
992+ elif cmd_name == 'text' or (cmd_name .startswith ('text' ) and len (cmd_name ) > 4 and cmd_name [4 :].isalpha ()):
993+ should_skip = True
994+ # Skip for \hypertarget
995+ elif cmd_name == 'hypertarget' :
996+ should_skip = True
997+ # Skip for \emph
998+ elif cmd_name == 'emph' :
999+ should_skip = True
1000+
1001+ if should_skip :
1002+ brace_count = 1
1003+ j += 1
1004+ while j < len (content ) and brace_count > 0 :
1005+ if content [j ] == '\\ ' and j + 1 < len (content ):
1006+ j += 2 # Skip escaped characters
1007+ continue
1008+ elif content [j ] == '{' :
1009+ brace_count += 1
1010+ elif content [j ] == '}' :
1011+ brace_count -= 1
1012+ j += 1
1013+ continue
1014+
1015+ # No more arguments to skip
1016+ break
1017+
1018+ i = j
1019+ continue
1020+
1021+ # Check if it's a known symbol
1022+ if char in known_symbols :
1023+ i += 1
1024+ continue
1025+
1026+ # Check if it's a letter (variables are allowed in math mode)
1027+ # But only single letters - multi-letter identifiers should use LaTeX commands
1028+ if char .isalpha ():
1029+ # Check if this starts a sequence of letters
1030+ j = i + 1
1031+ while j < len (content ) and content [j ].isalpha ():
1032+ j += 1
1033+ # If we have 2 or more consecutive letters, it's an error
1034+ if j - i >= 2 :
1035+ word = content [i :j ]
1036+ context_start = max (0 , i - 10 )
1037+ context_end = min (len (content ), j + 10 )
1038+ context = content [context_start :context_end ]
1039+ print (f"{ filename } :{ line_number } : Multi-letter identifier '{ word } ' in { env_type } should use LaTeX command, context: ...{ context } ..." )
1040+ errors += 1
1041+ i = j
1042+ continue
1043+ i += 1
1044+ continue
1045+
1046+ # If we get here, it's an unknown/invalid character
1047+ # Get context for error message
1048+ context_start = max (0 , i - 10 )
1049+ context_end = min (len (content ), i + 10 )
1050+ context = content [context_start :context_end ]
1051+ print (f"{ filename } :{ line_number } : Invalid character '{ char } ' (ord={ ord (char )} ) in { env_type } , context: ...{ context } ..." )
1052+ errors += 1
1053+ i += 1
1054+
1055+ return errors
1056+
1057+ # Build a list of excluded ranges (verbatim blocks) for efficient checking
1058+ excluded_ranges = []
1059+ for verb_pattern in verbatim_patterns :
1060+ for verb_match in re .finditer (verb_pattern , original_file_str , re .DOTALL ):
1061+ excluded_ranges .append ((verb_match .start (), verb_match .end ()))
1062+
1063+ # Sort ranges for efficient binary search-like checking
1064+ excluded_ranges .sort ()
1065+
1066+ def is_in_excluded_range (start : int , end : int ) -> bool :
1067+ """Check if a range overlaps with any excluded range."""
1068+ for exc_start , exc_end in excluded_ranges :
1069+ # If excluded range starts after our range ends, no more matches possible
1070+ if exc_start >= end :
1071+ break
1072+ # Check for overlap
1073+ if not (end <= exc_start or start >= exc_end ):
1074+ return True
1075+ return False
1076+
1077+ # Check each type of mathematical environment
1078+ for pattern , env_type in math_patterns :
1079+ for match in re .finditer (pattern , original_file_str , re .DOTALL ):
1080+ content = match .group (1 )
1081+ start_pos = match .start ()
1082+ end_pos = match .end ()
1083+
1084+ # Skip if this math environment is inside a verbatim block
1085+ if is_in_excluded_range (start_pos , end_pos ):
1086+ continue
1087+
1088+ num_errors += check_content_validity (content , env_type , start_pos )
1089+
1090+ return num_errors
1091+
1092+
8771093def check_relation_references (latex_files : list [str ]) -> int :
8781094 r"""
8791095 Checks that for each 'relation <name>' and 'function <name>' in any .spec file,
@@ -995,6 +1211,7 @@ def main():
9951211 check_rules ,
9961212 check_mathpar_macro_usage ,
9971213 check_balanced_parentheses_in_math ,
1214+ check_math_content_validity ,
9981215 ],
9991216 )
10001217
0 commit comments