0% found this document useful (0 votes)
24 views

tarun (1)

The document outlines various practical exercises related to lexical analysis in C programming. It includes the design of a lexical analyzer, identification of comments, validation of identifiers, and the use of tools like LEX and FLEX for generating lexical analyzers. Additionally, it covers programs for counting characters, words, vowels, consonants, and extracting numbers and HTML tags from files.

Uploaded by

Sumit Bhatt
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
24 views

tarun (1)

The document outlines various practical exercises related to lexical analysis in C programming. It includes the design of a lexical analyzer, identification of comments, validation of identifiers, and the use of tools like LEX and FLEX for generating lexical analyzers. Additionally, it covers programs for counting characters, words, vowels, consonants, and extracting numbers and HTML tags from files.

Uploaded by

Sumit Bhatt
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 51

PRACTICAL 1

Aim: Design a lexical analyzer for given language and the lexical analyzer
should ignore redundant spaces, abs and new lines. It should also ignore
comments. Although the syntax specification states that identifiers can be
arbitrarily long, you may restrict the length to some reasonable value.
Simulate the same in C language
Program:
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define MAX_IDENTIFIER_LEN 30
#define BUFFER_SIZE 1000

const char *keywords[] = {"if", "else", "while", "for", "int", "float", "return", "void",
"main"};
const int keyword_count = 9;

int is_keyword(const char *word) {


int i;
for (i = 0; i < keyword_count; i++) {
if (strcmp(word, keywords[i]) == 0) {
return 1;
}
}
return 0;
}

void skip_whitespace(FILE *file) {


char c;
while ((c = fgetc(file)) != EOF) {
if (!isspace(c)) {
ungetc(c, file);
break;
}
}
}

void skip_comments(FILE *file) {


char c = fgetc(file);
if (c == '/') {
while ((c = fgetc(file)) != EOF && c != '\n');
} else if (c == '*') {
while ((c = fgetc(file)) != EOF) {
if (c == '*' && (c = fgetc(file)) == '/') {
1
break;
}
}
} else {
ungetc(c, file);
}
}

void process_token(FILE *file) {


char buffer[MAX_IDENTIFIER_LEN + 1];
char c;
int i = 0;

while ((c = fgetc(file)) != EOF && (isalnum(c) || c == '_')) {


if (i < MAX_IDENTIFIER_LEN) {
buffer[i++] = c;
}
}
buffer[i] = '\0';
ungetc(c, file);

if (is_keyword(buffer)) {
printf("Keyword: %s\n", buffer);
} else if (i > 0) {
printf("Identifier: %s\n", buffer);
}
}

void process_operator(char c) {
printf("Operator: %c\n", c);
}

void lexical_analyzer(const char *filename) {


FILE *file = fopen(filename, "r");
if (!file) {
printf("Error opening file: %s\n", filename);
return;
}

char c;
while ((c = fgetc(file)) != EOF) {
if (isspace(c)) {
skip_whitespace(file);
} else if (c == '/') {
char next = fgetc(file);
if (next == '/' || next == '*') {
ungetc(next, file);
skip_comments(file);
2
} else {
ungetc(next, file);
process_operator(c);
}
} else if (isalnum(c) || c == '_') {
ungetc(c, file);
process_token(file);
} else if (strchr("+-*/=<>!&|;", c)) {
process_operator(c);
}
}

fclose(file);
}

int main() {
// Corrected file path with double backslashes
const char *filename =
""C:\\Users\\91740\\OneDrive\\Desktop\\Tarun\\LexPrograms\\text.txt";
lexical_analyzer(filename);
return 0;
}

3
Output:

4
PRACTICAL 2
Aim: Write a C program to identify whether a given line is a comment or
not.
Program:
#include <stdio.h>
#include <string.h>
void main() {
char com[100]; // Allow longer input
int i = 2, a = 0;
printf("Enter a line of code: ");
fgets(com, sizeof(com), stdin); // Use fgets for safer input handling
// Remove trailing newline character if present
size_t len = strlen(com);
if (len > 0 && com[len - 1] == '\n') {
com[len - 1] = '\0';
}
if (com[0] == '/') {
if (com[1] == '/') {
printf("It is a single-line comment.\n");
} else if (com[1] == '*') {
for (i = 2; i < len; i++) {
if (com[i] == '*' && com[i + 1] == '/') {
printf("It is a multi-line comment.\n");
a = 1;
break;
}
}
if (a == 0) {
printf("It is not a complete multi-line comment.\n");
}
} else {
printf("It is not a comment.\n");
}
} else {
printf("It is not a comment.\n");
}
}

5
Output:

6
PRACTICAL 3

Aim: Write a C program to test whether a given identifier is valid or not.

Program:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
const char *keywords[] = {
"auto", "break", "case", "char", "const", "continue", "default", "do", "double",
"else", "enum", "extern", "float", "for", "goto", "if", "inline", "int",
"long", "register", "restrict", "return", "short", "signed", "sizeof",
"static", "struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while", "_Alignas", "_Alignof", "_Atomic", "_Bool", "_Complex",
"_Generic", "_Imaginary", "_Noreturn", "_Static_assert", "_Thread_local"
};
const int keyword_count = 44;
int is_keyword(const char *word) {
for (int i = 0; i < keyword_count; i++) {
if (strcmp(word, keywords[i]) == 0) {
return 1;
}
}
return 0;
}
int is_valid_identifier(const char *identifier) {
int length = strlen(identifier);
if (length == 0) {
return 0;
}
if (!isalpha(identifier[0]) && identifier[0] != '_') {
return 0;
}
for (int i = 1; i < length; i++) {
if (!isalnum(identifier[i]) && identifier[i] != '_') {
return 0;
}
}
if (is_keyword(identifier)) {
return 0;
}
return 1;
}
int main() {
char identifier[100];

printf("Enter an identifier: ");


7
scanf("%99s", identifier);

if (is_valid_identifier(identifier)) {
printf("'%s' is a valid identifier.\n", identifier);
} else {
printf("'%s' is NOT a valid identifier.\n", identifier);
}
return 0;
}

Output:

8
PRACTICAL 4

Aim: Write a C program to simulate a lexical analyzer for validating


operators
Program:
#include <stdio.h>
#include <ctype.h>
#include <string.h>
// Function to check if a character is a valid operator
int isOperator(char ch) {
// List of valid operators in C
char operators[] = "+-*/%=<>!&|^";
for (int i = 0; i < strlen(operators); i++) {
if (ch == operators[i]) {
return 1; // It's a valid operator
}
}
return 0; // Not a valid operator
}
// Function to simulate lexical analysis for operators
void lexicalAnalyzer(char* input) {
int i = 0;
int length = strlen(input);
while (i < length) {
if (isOperator(input[i])) {
printf("Operator found: %c\n", input[i]);
}
i++;
}
}
int main() {
char input[100];
// Input expression
printf("Enter a string to check for operators: ");
scanf("%s", input);
// Run lexical analysis on the input string
lexicalAnalyzer(input);
return 0;
}

9
Output:

10
PRACTICAL 5
Aim: To Study about Lexical Analyzer Generator(LEX) and Flex(Fast
Lexical Analyzer)
Lexical analyzers are tools used to tokenize input streams into meaningful components
(tokens) such as keywords, identifiers, operators, and separators. LEX and FLEX are two
popular tools for generating lexical analyzers.

1. LEX (Lexical Analyzer Generator)


LEX is a tool used to generate a program that can recognize lexical patterns. It was
developed as part of the UNIX operating system for writing compilers and
interpreters.
How LEX Works:
1. Input Specification:
o A LEX file (.l) contains:
▪ Definitions: Define tokens and include headers.
▪ Rules: Patterns and associated actions (code to execute when a pattern
is matched).
▪ User Code: Additional helper functions or code
Example –
%{
#include <stdio.h>
%}
%%
[a-zA-Z]+ printf("Identifier: %s\n", yytext);
[0-9]+ printf("Number: %s\n", yytext);
. printf("Symbol: %s\n", yytext);
%%
int main() {
yylex(); // Start scanning
return 0;
}
2.Lexical Analysis:
• The LEX tool generates a C program (lex.yy.c) based on the .l file.

• This file contains a yylex() function that processes the input and matches patterns.
3.Compilation and Execution:
• Compile the generated lex.yy.c:
lex file.l

11
cc lex.yy.c -o lexer
./lexer

2. FLEX (Fast Lexical Analyzer)


FLEX is an enhanced and faster implementation of LEX. It is more commonly used due to its
performance and compatibility with modern systems.
Features of FLEX:
1. Faster execution and optimized code generation.
2. Compatible with LEX syntax, so .l files written for LEX can be processed by FLEX.
3. Supports extended regular expressions and offers more flexibility.
How FLEX Works:
FLEX works similarly to LEX:
1. Write a .l file containing patterns and actions.
2. Run FLEX to generate a C program (lex.yy.c).
3. Compile and run the resulting program to perform lexical analysis.
Example FLEX Program:
%{
#include <stdio.h>
%}
%%
"+" { printf("Operator: PLUS\n"); }
"-" { printf("Operator: MINUS\n"); }
[0-9]+ { printf("Number: %s\n", yytext); }
[a-zA-Z]+ { printf("Identifier: %s\n", yytext); }
[ \t\n] ; // Ignore whitespace
. { printf("Unknown character: %s\n", yytext); }
%%
int main() {
yylex(); // Start scanning
return 0;
}
Steps to Compile and Execute:
1. Generate C code from .l file:
flex file.l
2. Compile the generated lex.yy.c:
gcc lex.yy.c -o lexer
3. Execute the program:
12
./lexer
Applications
1. Compiler Design: Tokenizing programming languages for parsing.
2. Interpreters: Processing scripts and commands.
3. Text Processing: Searching and transforming text using patterns.
4. Log Analysis: Extracting meaningful data from log files.

13
PRACTICAL 6(a)

Aim: Create a Lexer to take input from text file and count no of characters,
no. of lines & no. of words.

Program:
%{
#include <stdio.h>
int num_lines = 0; // To store the number of lines
int num_words = 0; // To store the number of words
int num_chars = 0; // To store the number of characters
%}

%%

\n { num_lines++; } // Increment line count on newline


[a-zA-Z]+ { num_words++; num_chars += yyleng; } // Increment word and char count
(words are alphabetic)
[0-9]+ { num_chars += yyleng; } // Increment character count for numeric sequences
[ \t]+ { num_chars += yyleng; } // Increment character count for whitespaces
. { num_chars++; } // Count any other characters (e.g., punctuation)

%%

int yywrap() {
return 1; // Return 1 to indicate end of input
}

int main(int argc, char **argv) {


if (argc != 2) {
printf("Usage: %s <filename>\n", argv[0]);
return 1;
}
FILE *file = fopen(argv[1], "r");
if (!file) {
perror("File opening failed");
return 1;
}

yyin = file; // Set the input to the file


yylex(); // Start the lexical analysis
printf("Number of lines: %d\n", num_lines);
printf("Number of words: %d\n", num_words);
printf("Number of characters: %d\n", num_chars);
fclose(file);
return 0;
}
14
Output:

15
PRACTICAL 6(b)

Aim: Write a Lex program to count number of vowels and consonants in a


given input string.

Program:
%{
#include <stdio.h>
int num_vowels = 0; // To store the number of vowels
int num_consonants = 0; // To store the number of consonants
%}
%%
[aeiouAEIOU] { num_vowels++; } // Increment vowel count
[b-df-hj-np-tv-zB-DF-HJ-NP-TV-Z] { num_consonants++; } // Increment consonant count
[^a-zA-Z] { /* Ignore non-alphabetic characters */ }
%%
int main() {
printf("Enter a string: ");
yylex(); // Start lexical analysis
printf("Number of vowels: %d\n", num_vowels);
printf("Number of consonants: %d\n", num_consonants);
return 0;
}

Output:

16
PRACTICAL 7(a)

Aim: Write a Lex program to print out all numbers from the given file.
Program:
%{
#include <stdio.h>
#include <stdlib.h>
%}

%%

[0-9]+(\.[0-9]+)? { printf("%s\n", yytext); } // Match and print numbers (integer or floating-


point)
[ \t\n]+ { /* Ignore whitespace and newlines */ }
. { /* Ignore other non-number characters */ }

%%

int yywrap() {
return 1; // Indicate the end of input
}

int main(int argc, char **argv) {


if (argc != 2) {
printf("Usage: %s <filename>\n", argv[0]);
return 1;
}

FILE *file = fopen(argv[1], "r"); // Open the file passed as argument


if (!file) {
perror("File opening failed");
return 1;
}

yyin = file; // Set the input to the file


printf("Reading file: %s\n", argv[1]); // Debugging: Confirm file is being read
yylex(); // Start lexical analysis

fclose(file);
return 0;
}

Output:
17
18
PRACTICAL 7(b)
Aim: Write a Lex program to printout all HTML tags in file.
Program:
%{
#include <stdio.h>
%}

%%

"<"[^>]*">" { printf("%s\n", yytext); } /* Match HTML tags, anything between < > */
. { /* Ignore other characters */ }

%%

int yywrap() {
return 1; /* End of input */
}

int main(int argc, char*argv[]) {


if (argc != 2) {
printf("Usage: %s <filename>\n", argv[0]);
return 1;
}

// Open the file passed as argument


FILE *file = fopen(argv[1], "r");
if (!file) {
perror("File opening failed");
return 1;
}

yyin = file; // Set the input stream to the file


yylex(); // Perform lexical analysis

fclose(file); // Close the file


return 0;
}

19
Output:

20
PRACTICAL 7(c)
Aim: Write a Lex program which adds line numbers to the given file and
display the same onto the standard output.
Program:
%{
#include <stdio.h>
int line_number = 1; // initializing line number to 1
%}
%%
[^\n]*\n { printf("%10d %s", line_number++, yytext); } // Matches lines with text and
newline
.|\n { } // This handles cases where there might be extra characters, including newline
%%
int yywrap() {
return 1; // End of input
}
int main(int argc, char *argv[]) {
if (argc != 2) {
printf("Usage: %s <filename>\n", argv[0]);
return 1;
}
// Open the file passed as an argument
extern FILE *yyin;
yyin = fopen(argv[1], "r");
if (!yyin) {
perror("File opening failed");
return 1;
}
yylex(); // Start lexical analysis to process the file
fclose(yyin); // Close the file after processing
return 0;
}
Output:

21
PRACTICAL 8

Aim: Write a Lex program to count the number of comment lines in a


given C program. Also eliminate them and copy that program into separate
file.

Program:
%{
#include <stdio.h>

int comment_lines = 0; // To count the number of comment lines


FILE *output_file; // Output file pointer
%}

%%
"//".* { comment_lines++; } // Single-line comment
"/*"([^*]|\*[^/])*\*/ { comment_lines++; } // Multi-line comment
\n { /* Ignore newlines for counting lines */ }
. { fputc(yytext[0], output_file); } // Copy non-comment characters to the output file
%%

int main(int argc, char **argv) {


if (argc != 3) {
printf("Usage: %s <input_file> <output_file>\n", argv[0]);
return 1;
}

// Open input file for reading


FILE *input_file = fopen(argv[1], "r");
if (!input_file) {
perror("Error opening input file");
return 1;
}

// Open output file for writing


output_file = fopen(argv[2], "w");
if (!output_file) {
perror("Error opening output file");
return 1;
}

// Set yyin to the input file


yyin = input_file;

// Start lexical analysis


yylex();

22
// Print the count of comment lines
printf("Number of comment lines: %d\n", comment_lines);

// Close the files


fclose(input_file);
fclose(output_file);

return 0;
}
Output:

Output.c

23
PRACTICAL 9

Aim: Write a C program for implementing the functionalities of predictive


parser for the mini language.

Program:
#include <stdio.h>
#include <string.h>

#define MAX_RULES 6
#define MAX_TERMINALS 6
#define MAX_NON_TERMINALS 3
#define MAX_INPUT_LEN 100

// Structure to represent a grammar rule


struct rule {
char lhs; // Left-hand side (non-terminal)
char rhs[10]; // Right-hand side (production) should be large enough to hold the
production rule
};

// Global variables to store the grammar and parsing table


struct rule grammar[MAX_RULES];
char parse_table[MAX_NON_TERMINALS][MAX_TERMINALS][10]; // Parsing table

// Function to calculate the index of a non-terminal or terminal


int get_index(char c) {
if (c == 'S') return 0;
if (c == 'A') return 1;
if (c == 'B') return 2;
if (c == 'a') return 0;
if (c == 'b') return 1;
if (c == 'c') return 2;
if (c == 'd') return 3;
if (c == 'e') return 4;
if (c == 'f') return 5;
return -1; // Invalid character
}

// Function to build the parse table based on the grammar rules


void build_parse_table() {
// S -> aA
strcpy(parse_table[0][0], "aA");
// S -> bB
strcpy(parse_table[0][1], "bB");
// A -> cS
strcpy(parse_table[1][2], "cS");
24
// A -> d
strcpy(parse_table[1][3], "d");
// B -> eS
strcpy(parse_table[2][4], "eS");
// B -> f
strcpy(parse_table[2][5], "f");
}

// Function to perform predictive parsing


void predictive_parse(char input[]) {
char stack[MAX_INPUT_LEN];
int top = -1;
stack[++top] = 'S'; // Push the start symbol to the stack

int input_index = 0;
char current_input;

// Parsing loop
while (top >= 0) {
current_input = input[input_index];
char top_symbol = stack[top];

// If the top of the stack is a terminal, compare it with the input symbol
if (top_symbol >= 'a' && top_symbol <= 'z') {
if (top_symbol == current_input) {
printf("Match: %c\n", current_input);
input_index++;
top--; // Pop from the stack
} else {
printf("Error: Expected %c but found %c\n", top_symbol, current_input);
return;
}
}
// If the top of the stack is a non-terminal, look it up in the parsing table
else if (top_symbol >= 'A' && top_symbol <= 'Z') {
int row = get_index(top_symbol); // Get the index for the non-terminal
int col = get_index(current_input); // Get the index for the terminal

if (col == -1) {
printf("Error: Invalid terminal '%c' in input\n", current_input);
return;
}

if (strlen(parse_table[row][col]) > 0) {
// Pop the top symbol and push the production rule
top--;
char* production = parse_table[row][col];
printf("Using production: %c -> %s\n", top_symbol, production);
25
// Push the production to the stack (reverse the order)
for (int i = strlen(production) - 1; i >= 0; i--) {
stack[++top] = production[i];
}
} else {
printf("Error: No production rule for %c with input %c\n", top_symbol,
current_input);
return;
}
}
}

if (input[input_index] == '\0') {
printf("Parsing successful!\n");
} else {
printf("Error: Input string not completely parsed\n");
}
}

int main() {
// Initialize the grammar
grammar[0].lhs = 'S';
strcpy(grammar[0].rhs, "aA");

grammar[1].lhs = 'S';
strcpy(grammar[1].rhs, "bB");

grammar[2].lhs = 'A';
strcpy(grammar[2].rhs, "cS");

grammar[3].lhs = 'A';
strcpy(grammar[3].rhs, "d");

grammar[4].lhs = 'B';
strcpy(grammar[4].rhs, "eS");

grammar[5].lhs = 'B';
strcpy(grammar[5].rhs, "f");

// Build the parsing table


build_parse_table();

// Input string to parse


char input[MAX_INPUT_LEN];
printf("Enter the string to parse: ");
scanf("%s", input);

26
// Perform predictive parsing
predictive_parse(input);

return 0;
}

Output:

27
PRACTICAL 10

Aim: Write a C program for constructing of LL (1) parsing.

Program:
#include <stdio.h>
#include <string.h>

#define MAX_RULES 5
#define MAX_TERMINALS 4
#define MAX_NON_TERMINALS 3
#define MAX_INPUT_LEN 100

// Structure to represent a grammar rule


struct rule {
char lhs; // Left-hand side (non-terminal)
char rhs[10]; // Right-hand side (production) should be large enough to hold the
production rule
};

// Global variables to store the grammar and parsing table


struct rule grammar[MAX_RULES];
char parse_table[MAX_NON_TERMINALS][MAX_TERMINALS][10]; // Parsing table

// Function to calculate the index of a non-terminal or terminal


int get_index(char c) {
if (c == 'S') return 0;
if (c == 'A') return 1;
if (c == 'B') return 2;
if (c == 'a') return 0;
if (c == 'b') return 1;
if (c == '$') return 2; // End of input symbol
return -1; // Invalid character
}

// Function to build the parse table based on the grammar rules


void build_parse_table() {
// S -> AB
strcpy(parse_table[0][0], "AB");
// A -> a
strcpy(parse_table[1][0], "a");
// A -> ε
strcpy(parse_table[1][2], "ε");
// B -> b
strcpy(parse_table[2][1], "b");
// B -> ε
strcpy(parse_table[2][2], "ε");
28
}

// Function to perform LL(1) parsing


void ll1_parse(char input[]) {
char stack[MAX_INPUT_LEN];
int top = -1;
stack[++top] = '$'; // End of input symbol
stack[++top] = 'S'; // Start symbol

int input_index = 0;
char current_input;

// Parsing loop
while (top >= 0) {
current_input = input[input_index];

char top_symbol = stack[top];

// If the top of the stack is a terminal or $ (end of input), compare it with the input
symbol
if (top_symbol == '$') {
if (current_input == '$') {
printf("Parsing successful!\n");
return;
} else {
printf("Error: Expected end of input, but found %c\n", current_input);
return;
}
} else if (top_symbol >= 'a' && top_symbol <= 'z') {
if (top_symbol == current_input) {
printf("Match: %c\n", current_input);
input_index++;
top--; // Pop from the stack
} else {
printf("Error: Expected %c but found %c\n", top_symbol, current_input);
return;
}
}
// If the top of the stack is a non-terminal, look it up in the parsing table
else if (top_symbol >= 'A' && top_symbol <= 'Z') {
int row = get_index(top_symbol); // Get the index for the non-terminal
int col = get_index(current_input); // Get the index for the terminal

if (col == -1) {
printf("Error: Invalid terminal '%c' in input\n", current_input);
return;
}

29
if (strlen(parse_table[row][col]) > 0) {
// Pop the top symbol and push the production rule
top--;
char* production = parse_table[row][col];
printf("Using production: %c -> %s\n", top_symbol, production);

// Push the production to the stack (reverse the order)


for (int i = strlen(production) - 1; i >= 0; i--) {
stack[++top] = production[i];
}
} else {
printf("Error: No production rule for %c with input %c\n", top_symbol,
current_input);
return;
}
}
}

// If all input is parsed successfully and we expect $ at the end, it should match
if (input[input_index] == '$') {
printf("Parsing successful!\n");
} else {
printf("Error: Input string not completely parsed\n");
}
}

int main() {
// Initialize the grammar
grammar[0].lhs = 'S';
strcpy(grammar[0].rhs, "AB");

grammar[1].lhs = 'A';
strcpy(grammar[1].rhs, "a");

grammar[2].lhs = 'A';
strcpy(grammar[2].rhs, "ε");

grammar[3].lhs = 'B';
strcpy(grammar[3].rhs, "b");

grammar[4].lhs = 'B';
strcpy(grammar[4].rhs, "ε");

// Build the parsing table


build_parse_table();

// Input string to parse


char input[MAX_INPUT_LEN];
30
printf("Enter the string to parse: ");
scanf("%s", input);

// Append '$' to the input to mark the end of the input


strcat(input, "$");

// Perform LL(1) parsing


ll1_parse(input);

return 0;
}
Output:

31
PRACTICAL 11

Aim: Write a C program for constructing recursive descent parsing.

Program:
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>

char input[100];
int current_position = 0; // Points to the current character in the input

// Function prototypes for recursive descent parsing


void E();
void E_prime();
void T();
void T_prime();
void F();

// Function to check if the current character matches the expected character


void match(char expected) {
if (input[current_position] == expected) {
current_position++;
} else {
printf("Error: Expected '%c' but found '%c'.\n", expected, input[current_position]);
exit(1);
}
}

// Function to handle E -> T E'


void E() {
T(); // Match T
E_prime(); // Match E'
}

// Function to handle E' -> + T E' | ε


void E_prime() {
if (input[current_position] == '+') {
match('+'); // Match '+'
T(); // Match T
E_prime(); // Recursively handle E'
}
// Else epsilon, do nothing and return
}

// Function to handle T -> F T'


void T() {
32
F(); // Match F
T_prime(); // Match T'
}

// Function to handle T' -> * F T' | ε


void T_prime() {
if (input[current_position] == '*') {
match('*'); // Match '*'
F(); // Match F
T_prime(); // Recursively handle T'
}
// Else epsilon, do nothing and return
}

// Function to handle F -> ( E ) | id


void F() {
if (input[current_position] == '(') {
match('('); // Match '('
E(); // Match E
match(')'); // Match ')'
} else if (isalpha(input[current_position])) { // Check if it's an identifier
match(input[current_position]); // Match the identifier (id)
} else {
printf("Error: Unexpected symbol '%c'.\n", input[current_position]);
exit(1);
}
}

int main() {
// Take input expression from the user
printf("Enter the expression: ");
scanf("%s", input);

// Parse the expression


E(); // Start parsing from the start symbol 'E'

// Check if the entire input was consumed


if (input[current_position] == '\0') {
printf("Parsing successful!\n");
} else {
printf("Error: Input not completely parsed.\n");
}

return 0;
}

33
Output:

34
PRACTICAL 12

Aim: Write a C program to implement LALR parsing.

Program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_STACK 100


#define MAX_INPUT 100

typedef struct {
int state;
char symbol;
} StackItem;

StackItem stack[MAX_STACK];
int top = -1;

// Push an item onto the stack


void push(int state, char symbol) {
if (top >= MAX_STACK - 1) {
printf("Stack overflow.\n");
exit(1);
}
stack[++top].state = state;
stack[top].symbol = symbol;
}

// Pop n items from the stack


void pop(int n) {
if (top < n - 1) {
printf("Stack underflow: Attempted to pop %d items with only %d on stack.\n", n, top +
1);
exit(1);
}
top -= n;
}

// Print the current state of the stack


void printStack() {
printf("Stack: ");
for (int i = 0; i <= top; i++) {
printf("(%d, %c) ", stack[i].state, stack[i].symbol);
}
printf("\n");
35
}

// Grammar rules (LHS and RHS)


int lhs[6] = {'E', 'E', 'T', 'T', 'F', 'F'};
int rhs[6] = {3, 1, 3, 1, 3, 1};

// Action and goto tables


int action[12][6] = {
{5, -1, -1, 4, -1, -1}, // State 0
{-1, 6, -1, -1, -1, 100}, // State 1 (accept state)
{-1, -2, 7, -1, -2, -2}, // State 2
{-1, -4, -4, -1, -4, -4}, // State 3
{5, -1, -1, 4, -1, -1}, // State 4
{-1, -6, -6, -1, -6, -6}, // State 5
{5, -1, -1, 4, -1, -1}, // State 6
{5, -1, -1, 4, -1, -1}, // State 7
{-1, 6, -1, -1, 9, -1}, // State 8
{-1, -3, 7, -1, -3, -3}, // State 9
{-1, -5, -5, -1, -5, -5}, // State 10
{-1, -1, -1, -1, -1, -1} // State 11
};

int gotoTable[12][3] = {
{1, 2, 3}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1},
{8, 2, 3}, {-1, -1, -1}, {-1, 9, 3}, {-1, -1, 10},
{-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}
};

// Look up the action table based on state and symbol


int lookupAction(int state, char symbol) {
int index = -1;
switch (symbol) {
case 'i': index = 0; break; // id
case '+': index = 1; break;
case '*': index = 2; break;
case '(': index = 3; break;
case ')': index = 4; break;
case '$': index = 5; break;
default: return -1;
}
return action[state][index];
}

// Look up the goto table based on state and non-terminal symbol


int lookupGoto(int state, char symbol) {
int index = -1;
switch (symbol) {
case 'E': index = 0; break;
36
case 'T': index = 1; break;
case 'F': index = 2; break;
default: return -1;
}
return gotoTable[state][index];
}

// Preprocess input by replacing "id" with 'i'


void preprocessInput(char *input) {
char temp[MAX_INPUT];
int j = 0;
for (int i = 0; input[i] != '\0'; i++) {
if (input[i] == 'i' && input[i + 1] == 'd') {
temp[j++] = 'i'; // Replace "id" with 'i'
i++;
} else {
temp[j++] = input[i];
}
}
temp[j] = '\0';
strcpy(input, temp);
}

// Parse the input string using the LR parser


void parseInput(const char *input) {
char symbol;
int state, actionCode;
int ip = 0;

push(0, '$'); // Push initial state 0 and '$' symbol


printf("Parsing: %s\n", input);

while (1) {
printStack();
state = stack[top].state;
symbol = input[ip];
actionCode = lookupAction(state, symbol);
printf("State: %d, Symbol: %c, Action: %d\n", state, symbol, actionCode);
if (actionCode == 100) {
printf("Input accepted!\n");
break;
} else if (actionCode > 0) {
printf("Shift: Push (%d, %c)\n", actionCode, symbol);
push(actionCode, symbol);
ip++;
} else if (actionCode < 0) {
int rule = -actionCode - 1;
printf("Reduce by rule %d: %c -> ...\n", rule + 1, lhs[rule]);
37
pop(rhs[rule]);
state = stack[top].state;
int gotoState = lookupGoto(state, lhs[rule]);
printf("Goto state %d after reduction\n", gotoState);
push(gotoState, lhs[rule]);
} else {
printf("Parsing error.\n");
break;
}
}
}

// Main function
int main() {
char input[MAX_INPUT];
printf("Enter the input string (end with $): ");
scanf("%s", input);

preprocessInput(input); // Replace "id" with "i"


parseInput(input); // Parse the input string

return 0;
}

38
Output:

39
PRACTICAL 13

Aim: Write a C program to implement operator precedence parsing.

Program:
#include<stdio.h>
#include<string.h>
#include <stdlib.h>
char *input;
int i=0;
char lasthandle[6],stack[50],handles[][5]={")E(","E*E","E+E","i","E^E"};
//(E) becomes )E( when pushed to stack

int top=0,l;
char prec[9][9]={

/*input*/
/*stack + - * / ^ i ( ) $ */
/* + */ '>', '>','<','<','<','<','<','>','>',
/* - */ '>', '>','<','<','<','<','<','>','>',
/* * */ '>', '>','>','>','<','<','<','>','>',
/* / */ '>', '>','>','>','<','<','<','>','>',
/* ^ */ '>', '>','>','>','<','<','<','>','>',
/* i */ '>', '>','>','>','>','e','e','>','>',
/* ( */ '<', '<','<','<','<','<','<','>','e',
/* ) */ '>', '>','>','>','>','e','e','>','>',
/* $ */ '<', '<','<','<','<','<','<','<','>',
};

int getindex(char c)
{
switch(c)
{
case '+':return 0;
case '-':return 1;
case '*':return 2;
case '/':return 3;
case '^':return 4;
case 'i':return 5;
case '(':return 6;
case ')':return 7;
case '$':return 8;
}
}
int shift()
{
stack[++top]=*(input+i++);
40
stack[top+1]='\0';
}
int reduce()
{
int i,len,found,t;
for(i=0;i<5;i++)//selecting handles
{
len=strlen(handles[i]);
if(stack[top]==handles[i][0]&&top+1>=len)
{
found=1;
for(t=0;t<len;t++)
{
if(stack[top-t]!=handles[i][t])
{
found=0;
break;
}
}
if(found==1)
{
stack[top-t+1]='E';
top=top-t+1;
strcpy(lasthandle,handles[i]);
stack[top+1]='\0';
return 1;//successful reduction
}
}
}
return 0;
}
void dispstack()
{
int j;
for(j=0;j<=top;j++)
printf("%c",stack[j]);
}
void dispinput()
{
int j;
for(j=i;j<l;j++)
printf("%c",*(input+j));
}
void main()
{
int j;
input=(char*)malloc(50*sizeof(char));
printf("\nEnter the string\n");
41
scanf("%s",input);
input=strcat(input,"$");
l=strlen(input);
strcpy(stack,"$");
printf("\nSTACK\t\t\t\t\tINPUT\t\t\t\t\tACTION");
while(i<=l)
{
shift();
printf("\n");
dispstack();
printf("\t\t\t\t\t");
dispinput();
printf("\t\t\t\t\tShift");
if(prec[getindex(stack[top])][getindex(input[i])]=='>')
{
while(reduce())
{
printf("\n");
dispstack();
printf("\t\t\t\t\t");
dispinput();
printf("\t\t\t\t\tReduced: E->%s",lasthandle);
}
}
}

if(strcmp(stack,"$E$")==0)
printf("\nAccepted;");
else
printf("\nNot Accepted;");
}

42
Output:

43
PRACTICAL 14

Aim: To Study about Yet Another Compiler-Compiler(YACC).


YACC is an LALR parser generator developed at the beginning of the 1970s by Stephen C.
Johnson for the Unix operating system. It automatically generates the LALR(1) parsers from
formal grammar specifications. YACC plays an important role in compiler and interpreter
development since it provides a means to specify the grammar of a language and to produce
parsers that either interpret or compile code written in that language.
Key Concepts and Features of YACC
• Grammar Specification: The input to YACC is a context-free grammar (usually in
the Backus-Naur Form, BNF) that describes the syntax rules of the language it parses.
• Parser Generation: YACC translates the grammar into a C function that could
perform an efficient parsing of input text according to such predefined rules.
• LALR(1) Parsing: This is a bottom-up parsing method that makes use of a single
token lookahead in determining the next action of parsing.
• Semantic Actions: These are the grammar productions that are associated with an
action; this enables the execution of code, usually in C, used in the construction
of abstract syntax trees, the generation of intermediate representations, or error
handling.
• Attribute Grammars: These grammars consist of non-terminal grammar symbols
with attributes, which through semantic actions are used in the construction of parse
trees or the output of code.
• Integration with Lex: It is often used along with Lex, a tool that generates lexical
analyzers-scanners-which breaks input into tokens that are then processed by the
YACC parser.
A parser generator is a program that takes as input a specification of a syntax and produces as
output a procedure for recognizing that language. Historically, they are also called compiler
compilers. YACC (yet another compiler-compiler) is an LALR(1) (LookAhead, Left-to-right,
Rightmost derivation producer with 1 lookahead token) parser generator. YACC was
originally designed for being complemented by Lex.
Input File: YACC input file is divided into three parts.
/* definitions */
....
%%
/* rules */
....
%%
/* auxiliary routines */
....
Input File: Definition Part:
• The definition part includes information about the tokens used in the syntax
definition:
%token NUMBER
%token ID
• Yacc automatically assigns numbers for tokens, but it can be overridden by
%token NUMBER 621

44
• Yacc also recognizes single characters as tokens. Therefore, assigned token numbers
should no overlap ASCII codes.
• The definition part can include C code external to the definition of the parser and
variable declarations, within %{ and %} in the first column.
• It can also include the specification of the starting symbol in the grammar:
%start nonterminal
Input File:
• If yylex() is not defined in the auxiliary routines sections, then it should be included:
#include "lex.yy.c"
• YACC input file generally finishes with: .y
Output Files:
• The output of YACC is a file named y.tab.c
• If it contains the main() definition, it must be compiled to be executable.
• Otherwise, the code can be an external function definition for the function int
yyparse()
• If called with the –d option in the command line, Yacc produces as output a header
file y.tab.h with all its specific definition (particularly important are token definitions
to be included, for example, in a Lex input file).
• If called with the –v option, Yacc produces as output a file y.output containing a
textual description of the LALR(1) parsing table used by the parser. This is useful for
tracking down how the parser solves conflicts.
Example: Yacc File (.y)
%{
#include <ctype.h>
#include <stdio.h>
#define YYSTYPE double /* double type for yacc stack */
%}
%%
Lines : Lines S '\n' { printf("OK \n"); }
| S '\n’
| error '\n' {yyerror("Error: reenter last line:");
yyerrok; };
S : '(' S ')’
| '[' S ']’
| /* empty */ ;
%%
#include "lex.yy.c"
void yyerror(char * s)
/* yacc error handler */
{
fprintf (stderr, "%s\n", s);
}
int main(void)
{
return yyparse();
}
Lex File (.l)
%{
45
%}
%%
[ \t] { /* skip blanks and tabs */ }
\n|. { return yytext[0]; }
%%

46
PRACTICAL 15

Aim: Create Yacc and Lex specification files to recognizes arithmetic


expressions involving +, -, * and /.

Program:
Lexical Analyzer Source Code:
%{
#include "y.tab.h" // Include the header file generated by Bison
%}
%%
[0-9]+ { yylval = atoi(yytext); return NUMBER; }
[A-Za-z]+ { return ID; }
"+" { return '+'; }
"-" { return '-'; }
"*" { return '*'; }
"/" { return '/'; }
"(" { return '('; }
")" { return ')'; }
%%
int yywrap() {
return 1; // This is required to avoid "undefined reference to yywrap" error.
}
Parser Source Code:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "y.tab.h" // Include the generated header by Bison
extern char *yytext; // Declare yytext, it is defined by Lex

// Define a simple get_value function (for now, returns 0 for all identifiers)
int get_value(char *id) {
return 0; // Placeholder function that returns 0 for any identifier
}
%}

%token NUMBER ID
%left '+' '-'
%left '*' '/'

%%
// Start symbol
E:T{
printf("Result = %d\n", $$);
return 0;
}
47
// Term (handles expressions with higher precedence, multiplication/division)
T : T '+' T {
printf("Adding: %d + %d\n", $1, $3); // Debugging output
$$ = $1 + $3;
}
| T '-' T {
printf("Subtracting: %d - %d\n", $1, $3); // Debugging output
$$ = $1 - $3;
}
| T '*' T {
printf("Multiplying: %d * %d\n", $1, $3); // Debugging output
$$ = $1 * $3;
}
| T '/' T {
if ($3 == 0) {
printf("Error: Division by zero\n");
exit(1);
}
printf("Dividing: %d / %d\n", $1, $3); // Debugging output
$$ = $1 / $3;
}

// Handle unary negative sign before a number or identifier


| '-' NUMBER {
printf("Unary minus: -%d\n", $2); // Debugging output
$$ = -$2;
}
| '-' ID {
printf("Unary minus: -%s\n", yytext); // Debugging output
$$ = -get_value(yytext);
}

// Parentheses handling (ensure proper precedence)


| '(' T ')' {
$$ = $2;
}

// Direct handling of numbers and identifiers


| NUMBER { $$ = $1; }
| ID {
$$ = get_value(yytext);
printf("Using value of ID: %d\n", $$); // Debugging output
};
%%
int main() {
printf("Enter an expression: ");
yyparse(); // Start parsing
return 0;
48
}

// Error handling function


int yyerror(char *s) {
printf("Error: %s\n", s);
return 0;
}
Output:

49
PRACTICAL 16

Aim: Create Yacc and Lex specification files are used to generate a
calculator which accepts integer and float type arguments.

Program:
Lexical Analyzer Source Code:
%{
/* Definition section */
#include<stdio.h>
#include "y.tab.h"
extern int yylval;
%}

/* Rule Section */
%%
[0-9]+ {
yylval=atoi(yytext);
return NUMBER;
}
[\t] ;
[\n] return 0;
. return yytext[0];
%%
int yywrap()
{
return 1;
}

Parser Source Code :


%{
/* Definition section */
#include<stdio.h>
int flag=0;
%}

%token NUMBER
%left '+' '-'
%left '*' '/' '%'
%left '(' ')'
/* Rule Section */
%%
ArithmeticExpression: E{
printf("\nResult=%d\n", $$);
return 0;
};
E:E'+'E {$$=$1+$3;}
50
|E'-'E {$$=$1-$3;}
|E'*'E {$$=$1*$3;}
|E'/'E {$$=$1/$3;}
|E'%'E {$$=$1%$3;}
|'('E')' {$$=$2;}
| NUMBER {$$=$1;}
;
%%
//driver code
void main()
{
printf("\nEnter Any Arithmetic Expression which can have operations Addition, Subtraction,
Multiplication, Division, Modulus and Round brackets:\n");

yyparse();
if(flag==0)
printf("\nEntered arithmetic expression is Valid\n\n");
}
void yyerror()
{
printf("\nEntered arithmetic expression is Invalid\n\n");
flag=1;
}
Output:

51

You might also like