tarun (1)
tarun (1)
Aim: Design a lexical analyzer for given language and the lexical analyzer
should ignore redundant spaces, abs and new lines. It should also ignore
comments. Although the syntax specification states that identifiers can be
arbitrarily long, you may restrict the length to some reasonable value.
Simulate the same in C language
Program:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAX_IDENTIFIER_LEN 30
#define BUFFER_SIZE 1000
const char *keywords[] = {"if", "else", "while", "for", "int", "float", "return", "void",
"main"};
const int keyword_count = 9;
if (is_keyword(buffer)) {
printf("Keyword: %s\n", buffer);
} else if (i > 0) {
printf("Identifier: %s\n", buffer);
}
}
void process_operator(char c) {
printf("Operator: %c\n", c);
}
char c;
while ((c = fgetc(file)) != EOF) {
if (isspace(c)) {
skip_whitespace(file);
} else if (c == '/') {
char next = fgetc(file);
if (next == '/' || next == '*') {
ungetc(next, file);
skip_comments(file);
2
} else {
ungetc(next, file);
process_operator(c);
}
} else if (isalnum(c) || c == '_') {
ungetc(c, file);
process_token(file);
} else if (strchr("+-*/=<>!&|;", c)) {
process_operator(c);
}
}
fclose(file);
}
int main() {
// Corrected file path with double backslashes
const char *filename =
""C:\\Users\\91740\\OneDrive\\Desktop\\Tarun\\LexPrograms\\text.txt";
lexical_analyzer(filename);
return 0;
}
3
Output:
4
PRACTICAL 2
Aim: Write a C program to identify whether a given line is a comment or
not.
Program:
#include <stdio.h>
#include <string.h>
void main() {
char com[100]; // Allow longer input
int i = 2, a = 0;
printf("Enter a line of code: ");
fgets(com, sizeof(com), stdin); // Use fgets for safer input handling
// Remove trailing newline character if present
size_t len = strlen(com);
if (len > 0 && com[len - 1] == '\n') {
com[len - 1] = '\0';
}
if (com[0] == '/') {
if (com[1] == '/') {
printf("It is a single-line comment.\n");
} else if (com[1] == '*') {
for (i = 2; i < len; i++) {
if (com[i] == '*' && com[i + 1] == '/') {
printf("It is a multi-line comment.\n");
a = 1;
break;
}
}
if (a == 0) {
printf("It is not a complete multi-line comment.\n");
}
} else {
printf("It is not a comment.\n");
}
} else {
printf("It is not a comment.\n");
}
}
5
Output:
6
PRACTICAL 3
Program:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
const char *keywords[] = {
"auto", "break", "case", "char", "const", "continue", "default", "do", "double",
"else", "enum", "extern", "float", "for", "goto", "if", "inline", "int",
"long", "register", "restrict", "return", "short", "signed", "sizeof",
"static", "struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while", "_Alignas", "_Alignof", "_Atomic", "_Bool", "_Complex",
"_Generic", "_Imaginary", "_Noreturn", "_Static_assert", "_Thread_local"
};
const int keyword_count = 44;
int is_keyword(const char *word) {
for (int i = 0; i < keyword_count; i++) {
if (strcmp(word, keywords[i]) == 0) {
return 1;
}
}
return 0;
}
int is_valid_identifier(const char *identifier) {
int length = strlen(identifier);
if (length == 0) {
return 0;
}
if (!isalpha(identifier[0]) && identifier[0] != '_') {
return 0;
}
for (int i = 1; i < length; i++) {
if (!isalnum(identifier[i]) && identifier[i] != '_') {
return 0;
}
}
if (is_keyword(identifier)) {
return 0;
}
return 1;
}
int main() {
char identifier[100];
if (is_valid_identifier(identifier)) {
printf("'%s' is a valid identifier.\n", identifier);
} else {
printf("'%s' is NOT a valid identifier.\n", identifier);
}
return 0;
}
Output:
8
PRACTICAL 4
9
Output:
10
PRACTICAL 5
Aim: To Study about Lexical Analyzer Generator(LEX) and Flex(Fast
Lexical Analyzer)
Lexical analyzers are tools used to tokenize input streams into meaningful components
(tokens) such as keywords, identifiers, operators, and separators. LEX and FLEX are two
popular tools for generating lexical analyzers.
• This file contains a yylex() function that processes the input and matches patterns.
3.Compilation and Execution:
• Compile the generated lex.yy.c:
lex file.l
11
cc lex.yy.c -o lexer
./lexer
13
PRACTICAL 6(a)
Aim: Create a Lexer to take input from text file and count no of characters,
no. of lines & no. of words.
Program:
%{
#include <stdio.h>
int num_lines = 0; // To store the number of lines
int num_words = 0; // To store the number of words
int num_chars = 0; // To store the number of characters
%}
%%
%%
int yywrap() {
return 1; // Return 1 to indicate end of input
}
15
PRACTICAL 6(b)
Program:
%{
#include <stdio.h>
int num_vowels = 0; // To store the number of vowels
int num_consonants = 0; // To store the number of consonants
%}
%%
[aeiouAEIOU] { num_vowels++; } // Increment vowel count
[b-df-hj-np-tv-zB-DF-HJ-NP-TV-Z] { num_consonants++; } // Increment consonant count
[^a-zA-Z] { /* Ignore non-alphabetic characters */ }
%%
int main() {
printf("Enter a string: ");
yylex(); // Start lexical analysis
printf("Number of vowels: %d\n", num_vowels);
printf("Number of consonants: %d\n", num_consonants);
return 0;
}
Output:
16
PRACTICAL 7(a)
Aim: Write a Lex program to print out all numbers from the given file.
Program:
%{
#include <stdio.h>
#include <stdlib.h>
%}
%%
%%
int yywrap() {
return 1; // Indicate the end of input
}
fclose(file);
return 0;
}
Output:
17
18
PRACTICAL 7(b)
Aim: Write a Lex program to printout all HTML tags in file.
Program:
%{
#include <stdio.h>
%}
%%
"<"[^>]*">" { printf("%s\n", yytext); } /* Match HTML tags, anything between < > */
. { /* Ignore other characters */ }
%%
int yywrap() {
return 1; /* End of input */
}
19
Output:
20
PRACTICAL 7(c)
Aim: Write a Lex program which adds line numbers to the given file and
display the same onto the standard output.
Program:
%{
#include <stdio.h>
int line_number = 1; // initializing line number to 1
%}
%%
[^\n]*\n { printf("%10d %s", line_number++, yytext); } // Matches lines with text and
newline
.|\n { } // This handles cases where there might be extra characters, including newline
%%
int yywrap() {
return 1; // End of input
}
int main(int argc, char *argv[]) {
if (argc != 2) {
printf("Usage: %s <filename>\n", argv[0]);
return 1;
}
// Open the file passed as an argument
extern FILE *yyin;
yyin = fopen(argv[1], "r");
if (!yyin) {
perror("File opening failed");
return 1;
}
yylex(); // Start lexical analysis to process the file
fclose(yyin); // Close the file after processing
return 0;
}
Output:
21
PRACTICAL 8
Program:
%{
#include <stdio.h>
%%
"//".* { comment_lines++; } // Single-line comment
"/*"([^*]|\*[^/])*\*/ { comment_lines++; } // Multi-line comment
\n { /* Ignore newlines for counting lines */ }
. { fputc(yytext[0], output_file); } // Copy non-comment characters to the output file
%%
22
// Print the count of comment lines
printf("Number of comment lines: %d\n", comment_lines);
return 0;
}
Output:
Output.c
23
PRACTICAL 9
Program:
#include <stdio.h>
#include <string.h>
#define MAX_RULES 6
#define MAX_TERMINALS 6
#define MAX_NON_TERMINALS 3
#define MAX_INPUT_LEN 100
int input_index = 0;
char current_input;
// Parsing loop
while (top >= 0) {
current_input = input[input_index];
char top_symbol = stack[top];
// If the top of the stack is a terminal, compare it with the input symbol
if (top_symbol >= 'a' && top_symbol <= 'z') {
if (top_symbol == current_input) {
printf("Match: %c\n", current_input);
input_index++;
top--; // Pop from the stack
} else {
printf("Error: Expected %c but found %c\n", top_symbol, current_input);
return;
}
}
// If the top of the stack is a non-terminal, look it up in the parsing table
else if (top_symbol >= 'A' && top_symbol <= 'Z') {
int row = get_index(top_symbol); // Get the index for the non-terminal
int col = get_index(current_input); // Get the index for the terminal
if (col == -1) {
printf("Error: Invalid terminal '%c' in input\n", current_input);
return;
}
if (strlen(parse_table[row][col]) > 0) {
// Pop the top symbol and push the production rule
top--;
char* production = parse_table[row][col];
printf("Using production: %c -> %s\n", top_symbol, production);
25
// Push the production to the stack (reverse the order)
for (int i = strlen(production) - 1; i >= 0; i--) {
stack[++top] = production[i];
}
} else {
printf("Error: No production rule for %c with input %c\n", top_symbol,
current_input);
return;
}
}
}
if (input[input_index] == '\0') {
printf("Parsing successful!\n");
} else {
printf("Error: Input string not completely parsed\n");
}
}
int main() {
// Initialize the grammar
grammar[0].lhs = 'S';
strcpy(grammar[0].rhs, "aA");
grammar[1].lhs = 'S';
strcpy(grammar[1].rhs, "bB");
grammar[2].lhs = 'A';
strcpy(grammar[2].rhs, "cS");
grammar[3].lhs = 'A';
strcpy(grammar[3].rhs, "d");
grammar[4].lhs = 'B';
strcpy(grammar[4].rhs, "eS");
grammar[5].lhs = 'B';
strcpy(grammar[5].rhs, "f");
26
// Perform predictive parsing
predictive_parse(input);
return 0;
}
Output:
27
PRACTICAL 10
Program:
#include <stdio.h>
#include <string.h>
#define MAX_RULES 5
#define MAX_TERMINALS 4
#define MAX_NON_TERMINALS 3
#define MAX_INPUT_LEN 100
int input_index = 0;
char current_input;
// Parsing loop
while (top >= 0) {
current_input = input[input_index];
// If the top of the stack is a terminal or $ (end of input), compare it with the input
symbol
if (top_symbol == '$') {
if (current_input == '$') {
printf("Parsing successful!\n");
return;
} else {
printf("Error: Expected end of input, but found %c\n", current_input);
return;
}
} else if (top_symbol >= 'a' && top_symbol <= 'z') {
if (top_symbol == current_input) {
printf("Match: %c\n", current_input);
input_index++;
top--; // Pop from the stack
} else {
printf("Error: Expected %c but found %c\n", top_symbol, current_input);
return;
}
}
// If the top of the stack is a non-terminal, look it up in the parsing table
else if (top_symbol >= 'A' && top_symbol <= 'Z') {
int row = get_index(top_symbol); // Get the index for the non-terminal
int col = get_index(current_input); // Get the index for the terminal
if (col == -1) {
printf("Error: Invalid terminal '%c' in input\n", current_input);
return;
}
29
if (strlen(parse_table[row][col]) > 0) {
// Pop the top symbol and push the production rule
top--;
char* production = parse_table[row][col];
printf("Using production: %c -> %s\n", top_symbol, production);
// If all input is parsed successfully and we expect $ at the end, it should match
if (input[input_index] == '$') {
printf("Parsing successful!\n");
} else {
printf("Error: Input string not completely parsed\n");
}
}
int main() {
// Initialize the grammar
grammar[0].lhs = 'S';
strcpy(grammar[0].rhs, "AB");
grammar[1].lhs = 'A';
strcpy(grammar[1].rhs, "a");
grammar[2].lhs = 'A';
strcpy(grammar[2].rhs, "ε");
grammar[3].lhs = 'B';
strcpy(grammar[3].rhs, "b");
grammar[4].lhs = 'B';
strcpy(grammar[4].rhs, "ε");
return 0;
}
Output:
31
PRACTICAL 11
Program:
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
char input[100];
int current_position = 0; // Points to the current character in the input
int main() {
// Take input expression from the user
printf("Enter the expression: ");
scanf("%s", input);
return 0;
}
33
Output:
34
PRACTICAL 12
Program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
int state;
char symbol;
} StackItem;
StackItem stack[MAX_STACK];
int top = -1;
int gotoTable[12][3] = {
{1, 2, 3}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1},
{8, 2, 3}, {-1, -1, -1}, {-1, 9, 3}, {-1, -1, 10},
{-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}
};
while (1) {
printStack();
state = stack[top].state;
symbol = input[ip];
actionCode = lookupAction(state, symbol);
printf("State: %d, Symbol: %c, Action: %d\n", state, symbol, actionCode);
if (actionCode == 100) {
printf("Input accepted!\n");
break;
} else if (actionCode > 0) {
printf("Shift: Push (%d, %c)\n", actionCode, symbol);
push(actionCode, symbol);
ip++;
} else if (actionCode < 0) {
int rule = -actionCode - 1;
printf("Reduce by rule %d: %c -> ...\n", rule + 1, lhs[rule]);
37
pop(rhs[rule]);
state = stack[top].state;
int gotoState = lookupGoto(state, lhs[rule]);
printf("Goto state %d after reduction\n", gotoState);
push(gotoState, lhs[rule]);
} else {
printf("Parsing error.\n");
break;
}
}
}
// Main function
int main() {
char input[MAX_INPUT];
printf("Enter the input string (end with $): ");
scanf("%s", input);
return 0;
}
38
Output:
39
PRACTICAL 13
Program:
#include<stdio.h>
#include<string.h>
#include <stdlib.h>
char *input;
int i=0;
char lasthandle[6],stack[50],handles[][5]={")E(","E*E","E+E","i","E^E"};
//(E) becomes )E( when pushed to stack
int top=0,l;
char prec[9][9]={
/*input*/
/*stack + - * / ^ i ( ) $ */
/* + */ '>', '>','<','<','<','<','<','>','>',
/* - */ '>', '>','<','<','<','<','<','>','>',
/* * */ '>', '>','>','>','<','<','<','>','>',
/* / */ '>', '>','>','>','<','<','<','>','>',
/* ^ */ '>', '>','>','>','<','<','<','>','>',
/* i */ '>', '>','>','>','>','e','e','>','>',
/* ( */ '<', '<','<','<','<','<','<','>','e',
/* ) */ '>', '>','>','>','>','e','e','>','>',
/* $ */ '<', '<','<','<','<','<','<','<','>',
};
int getindex(char c)
{
switch(c)
{
case '+':return 0;
case '-':return 1;
case '*':return 2;
case '/':return 3;
case '^':return 4;
case 'i':return 5;
case '(':return 6;
case ')':return 7;
case '$':return 8;
}
}
int shift()
{
stack[++top]=*(input+i++);
40
stack[top+1]='\0';
}
int reduce()
{
int i,len,found,t;
for(i=0;i<5;i++)//selecting handles
{
len=strlen(handles[i]);
if(stack[top]==handles[i][0]&&top+1>=len)
{
found=1;
for(t=0;t<len;t++)
{
if(stack[top-t]!=handles[i][t])
{
found=0;
break;
}
}
if(found==1)
{
stack[top-t+1]='E';
top=top-t+1;
strcpy(lasthandle,handles[i]);
stack[top+1]='\0';
return 1;//successful reduction
}
}
}
return 0;
}
void dispstack()
{
int j;
for(j=0;j<=top;j++)
printf("%c",stack[j]);
}
void dispinput()
{
int j;
for(j=i;j<l;j++)
printf("%c",*(input+j));
}
void main()
{
int j;
input=(char*)malloc(50*sizeof(char));
printf("\nEnter the string\n");
41
scanf("%s",input);
input=strcat(input,"$");
l=strlen(input);
strcpy(stack,"$");
printf("\nSTACK\t\t\t\t\tINPUT\t\t\t\t\tACTION");
while(i<=l)
{
shift();
printf("\n");
dispstack();
printf("\t\t\t\t\t");
dispinput();
printf("\t\t\t\t\tShift");
if(prec[getindex(stack[top])][getindex(input[i])]=='>')
{
while(reduce())
{
printf("\n");
dispstack();
printf("\t\t\t\t\t");
dispinput();
printf("\t\t\t\t\tReduced: E->%s",lasthandle);
}
}
}
if(strcmp(stack,"$E$")==0)
printf("\nAccepted;");
else
printf("\nNot Accepted;");
}
42
Output:
43
PRACTICAL 14
44
• Yacc also recognizes single characters as tokens. Therefore, assigned token numbers
should no overlap ASCII codes.
• The definition part can include C code external to the definition of the parser and
variable declarations, within %{ and %} in the first column.
• It can also include the specification of the starting symbol in the grammar:
%start nonterminal
Input File:
• If yylex() is not defined in the auxiliary routines sections, then it should be included:
#include "lex.yy.c"
• YACC input file generally finishes with: .y
Output Files:
• The output of YACC is a file named y.tab.c
• If it contains the main() definition, it must be compiled to be executable.
• Otherwise, the code can be an external function definition for the function int
yyparse()
• If called with the –d option in the command line, Yacc produces as output a header
file y.tab.h with all its specific definition (particularly important are token definitions
to be included, for example, in a Lex input file).
• If called with the –v option, Yacc produces as output a file y.output containing a
textual description of the LALR(1) parsing table used by the parser. This is useful for
tracking down how the parser solves conflicts.
Example: Yacc File (.y)
%{
#include <ctype.h>
#include <stdio.h>
#define YYSTYPE double /* double type for yacc stack */
%}
%%
Lines : Lines S '\n' { printf("OK \n"); }
| S '\n’
| error '\n' {yyerror("Error: reenter last line:");
yyerrok; };
S : '(' S ')’
| '[' S ']’
| /* empty */ ;
%%
#include "lex.yy.c"
void yyerror(char * s)
/* yacc error handler */
{
fprintf (stderr, "%s\n", s);
}
int main(void)
{
return yyparse();
}
Lex File (.l)
%{
45
%}
%%
[ \t] { /* skip blanks and tabs */ }
\n|. { return yytext[0]; }
%%
46
PRACTICAL 15
Program:
Lexical Analyzer Source Code:
%{
#include "y.tab.h" // Include the header file generated by Bison
%}
%%
[0-9]+ { yylval = atoi(yytext); return NUMBER; }
[A-Za-z]+ { return ID; }
"+" { return '+'; }
"-" { return '-'; }
"*" { return '*'; }
"/" { return '/'; }
"(" { return '('; }
")" { return ')'; }
%%
int yywrap() {
return 1; // This is required to avoid "undefined reference to yywrap" error.
}
Parser Source Code:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "y.tab.h" // Include the generated header by Bison
extern char *yytext; // Declare yytext, it is defined by Lex
// Define a simple get_value function (for now, returns 0 for all identifiers)
int get_value(char *id) {
return 0; // Placeholder function that returns 0 for any identifier
}
%}
%token NUMBER ID
%left '+' '-'
%left '*' '/'
%%
// Start symbol
E:T{
printf("Result = %d\n", $$);
return 0;
}
47
// Term (handles expressions with higher precedence, multiplication/division)
T : T '+' T {
printf("Adding: %d + %d\n", $1, $3); // Debugging output
$$ = $1 + $3;
}
| T '-' T {
printf("Subtracting: %d - %d\n", $1, $3); // Debugging output
$$ = $1 - $3;
}
| T '*' T {
printf("Multiplying: %d * %d\n", $1, $3); // Debugging output
$$ = $1 * $3;
}
| T '/' T {
if ($3 == 0) {
printf("Error: Division by zero\n");
exit(1);
}
printf("Dividing: %d / %d\n", $1, $3); // Debugging output
$$ = $1 / $3;
}
49
PRACTICAL 16
Aim: Create Yacc and Lex specification files are used to generate a
calculator which accepts integer and float type arguments.
Program:
Lexical Analyzer Source Code:
%{
/* Definition section */
#include<stdio.h>
#include "y.tab.h"
extern int yylval;
%}
/* Rule Section */
%%
[0-9]+ {
yylval=atoi(yytext);
return NUMBER;
}
[\t] ;
[\n] return 0;
. return yytext[0];
%%
int yywrap()
{
return 1;
}
%token NUMBER
%left '+' '-'
%left '*' '/' '%'
%left '(' ')'
/* Rule Section */
%%
ArithmeticExpression: E{
printf("\nResult=%d\n", $$);
return 0;
};
E:E'+'E {$$=$1+$3;}
50
|E'-'E {$$=$1-$3;}
|E'*'E {$$=$1*$3;}
|E'/'E {$$=$1/$3;}
|E'%'E {$$=$1%$3;}
|'('E')' {$$=$2;}
| NUMBER {$$=$1;}
;
%%
//driver code
void main()
{
printf("\nEnter Any Arithmetic Expression which can have operations Addition, Subtraction,
Multiplication, Division, Modulus and Round brackets:\n");
yyparse();
if(flag==0)
printf("\nEntered arithmetic expression is Valid\n\n");
}
void yyerror()
{
printf("\nEntered arithmetic expression is Invalid\n\n");
flag=1;
}
Output:
51