Decaf
Decaf
Lexical Components
The following are keywords. They are all reserved, which means they cannot be used as
identifiers or redefined.
void int double bool string class interface null this extends implements
for while if else return break New NewArray Print ReadInteger
ReadLine
An identifier is a sequence of letters, digits, and underscores, starting with a letter. Decaf
is case-sensitive.
Whitespace (i.e. spaces, tabs, and newlines) serves to separate tokens, but is otherwise
ignored.
A boolean constant is either true or false. Like keywords, these words are reserved.
An integer constant can either be specified in decimal (base 10) or hexadecimal (base
16). A decimal integer is a sequence of decimal digits (0-9). A hexadecimal integer must
begin with 0X or 0x.
A double constant is a sequence of digits, a period, followed by any sequence of digits,
maybe none.
A string constant is a sequence of characters enclosed in double quotes. Strings can
contain any character except a newline or double quote.
Operators and punctuation characters used by the language includes:
A single-line comment is started by // and extends to the end of the line. Multi-line
comments start with /* and end with the first subsequent */.
Grammar
All the Decaf programs should conform to the following language.
Notations:
The terminal symbols used in this description of the Decaf grammar are:
Category Symbols
Identifiers identifier
Literals intLiteral charLiteral booleanLiteral
Keywords if while else . . .
Primitive Types boolean char int void
Punctuation (){}[];,.
Operators +-*/=...
Program ➝ Decl+
VariableDecl ➝ Variable ;
Formals ➝ Variable+, | ∈
BreakStmt ➝ break ;
Expr ➝ Lvalue = Expr | Constant | Lvalue | this | Call | ( Expr ) | Expr + Expr | Expr -
Expr | Expr * Expr | Expr / Expr | Expr % Expr | - Expr | Expr < Expr | Expr <= Expr |
Expr > Expr | Expr >= Expr | Expr == Expr | Expr != Expr | Expr && Expr | Expr || Expr
| !Expr | ReadInteger( ) | ReadLine() | New ( Indent ) | New Array ( Expr, Type )
Actuals ➝ Expr+, | ∈
Implementation
The parser has been implemented using Bison while the lexical analysis is done using Flex. C
Language has been used for the implementation of the symbol table.
Type of Parser
Bottom up parser as available in Flex and Bison.
Methodology
Using Flex and Bison, we built the lexical and syntax phases of the compiler of the language
Decaf. Lexical analyzer / scanning phase will scan each lexeme and classify them on the basis of
the type of tokens generated by them. Syntax analyzer will check if the tokens form the proper
grammar as required by the language definition. A symbol table will be maintained in order to
keep track of variables defined in a Decaf program. In case of any error in these two stages, a
message will be displayed indicated in which row and column the error has occurred.
Code
%{
#include"Decaf.tab.h"
%}
%option yylineno
%x C_COMMENT
%%
"/*" { BEGIN(C_COMMENT); }
<C_COMMENT>"*/" { BEGIN(INITIAL); }
<C_COMMENT>\n { }
<C_COMMENT>. {}
"//".*
\n {printf("%s\n",yytext);}
(" "|\t) {printf("%s\n",yytext);}
(";") {printf("%s\n",yytext);return END;}
(",") {printf("%s\n",yytext);return COMMA;}
"." {printf("%s\n",yytext);return FS;}
"[" {printf("%s\n",yytext);return SQBO;}
"]" {printf("%s\n",yytext);return SQBC;}
("(") {printf("%s\n",yytext);return OB;}
(")") {printf("%s\n",yytext);return CB;}
("{") {printf("%s\n",yytext);return OCB;}
("}") {printf("%s\n",yytext);return CCB;}
("0x"|"0X")[0-9|A-F|a-f]+ {printf("%s\n",yytext);return HEXCONST;}
[+|-]?[0-9]+[\.][0-9]*([E][+|-]?[0-9]+)? {printf("%s\n",yytext);return FLOAT;}
[+|-]?[0-9]+ {printf("%s\n",yytext);return DECCONST;}
"null" {printf("%s\n",yytext);return NULLCONST;}
"true"|"false" {printf("%s\n",yytext);return BOOLCONST;}
"void" {printf("%s\n",yytext);return VOID;}
"class" {printf("%s\n",yytext);return CLASS;}
"extends" {printf("%s\n",yytext);return EXTENDS;}
"implements" {printf("%s\n",yytext);return IMPLEMENTS;}
"interface" {printf("%s\n",yytext);return INTERFACE;}
"int"|"double"|"bool"|"string" {printf("%s\n",yytext);return DT;}
"if" {printf("%s\n",yytext);return IF;}
"else" {printf("%s\n",yytext);return ELSE;}
"for" {printf("%s\n",yytext);return FOR;}
"while" {printf("%s\n",yytext);return WHILE;}
"return" {printf("%s\n",yytext);return RETURN;}
"break" {printf("%s\n",yytext);return BREAK;}
"Print" {printf("%s\n",yytext);return PRINT;}
"this" {printf("%s\n",yytext);return THIS;}
"ReadInteger"|"ReadLine" {printf("%s\n",yytext);return READ;}
"New" {printf("%s\n",yytext);return NEW;}
"NewArray" {printf("%s\n",yytext);return NEWARR;}
[A-Za-z_][0-9|A-Za-z_]* {printf("%s\n",yytext);return ID;}
("-") {printf("%s\n",yytext);return MINUS;}
("+") {printf("%s\n",yytext);return PLUS;}
("*") {printf("%s\n",yytext);return MULT;}
("/") {printf("%s\n",yytext);return DIVIDE;}
("%") {printf("%s\n",yytext);return MOD;}
("!") {printf("%s\n",yytext);return NOT;}
("&&") {printf("%s\n",yytext);return AND;}
("||") {printf("%s\n",yytext);return OR;}
("<") {printf("%s\n",yytext);return GT;}
(">") {printf("%s\n",yytext);return LT;}
("!=") {printf("%s\n",yytext);return NE;}
("==") {printf("%s\n",yytext);return EQQ;}
("<=") {printf("%s\n",yytext);return LTE;}
(">=") {printf("%s\n",yytext);return GTE;}
("=") {printf("%s\n",yytext);return EQ;}
(\"(\\.|[^"\\])*\") {printf("%s\n",yytext);return STRCONST;}
%%
int yywrap()
{
return 1;
}
Parser (Bison)
//dec.y file
%{
#include<stdio.h>
#include<stdlib.h>
#include "scoper.h"
int yylex();
int yyerror();
extern FILE* yyin;
extern int yylineno;
%}
%token COMM TS NL HEXCONST FLOAT DECCONST BOOLCONST KEY ID
STRCONST END DT COMMA FS NULLCONST SQBO SQBC OB CB VOID CLASS
OCB CCB EXTENDS IMPLEMENTS INTERFACE FOR WHILE IF ELSE RETURN
BREAK EQ THIS MINUS NOT READ NEW NEWARR PRINT SP PLUS MULT
DIVIDE MOD AND OR NE EQQ LT GT LTE GTE U_MINUS
%locations
//setting precedences and associativity in order to avoid shift-reduce conflicts
%left EQ
%left OR
%left AND
%nonassoc EQQ NE
%nonassoc LT GT LTE GTE
%left PLUS MINUS
%left MULT DIVIDE MOD
%nonassoc T_UnaryMinus NOT
%nonassoc FS SQBO
%nonassoc T_Lower_Than_Else
%nonassoc ELSE
%%
start : declList{printf("Success \n"); exit(0);}
;
declList : declList decl
| decl
;
decl : classDecl
| fnDecl
| varDecl
| intDecl
;
varDecl : var END
;
var : type ID
;
type : DT
| ID
| type SQBO SQBC
;
intDecl : INTERFACE ID OCB intfList CCB
;
intfList : intfList fnHeader END
|
;
classDecl : CLASS ID optExt optImpl OCB fieldList CCB
;
optExt : EXTENDS ID
|
;
optImpl : IMPLEMENTS impList
|
;
impList : impList COMMA ID
| ID
;
fieldList : fieldList field
|
;
field : varDecl
| fnDecl
;
fnHeader : type ID OB formals CB
| VOID ID OB formals CB
;
formals : formalList
|
;
formalList : formalList COMMA var
| var
;
fnDecl : fnHeader stmtBlock
;
stmtBlock : OCB varDecls stmtList CCB
;
varDecls : varDecls varDecl
|
;
stmtList : stmt stmtList
|
;
stmt : optExpr END
| stmtBlock
| IF OB expr CB stmt optElse
| WHILE OB expr CB stmt
| FOR OB optExpr END expr END optExpr CB stmt
| RETURN expr END
| RETURN END
| PRINT OB exprList CB END
| BREAK END
;
lvalue : ID
| expr FS ID
| expr SQBO expr SQBC
;
call : ID OB actuals CB
| expr FS ID OB actuals CB
;
optExpr : expr
|
;
expr : lvalue
| call
| constant
| lvalue EQ expr
| expr PLUS expr
| expr MINUS expr
| expr DIVIDE expr
| expr MULT expr
| expr MOD expr
| expr EQQ expr
| expr NE expr
| expr LT expr
| expr GT expr
| expr LTE expr
| expr GTE expr
| expr AND expr
| expr OR expr
| OB expr CB
| '-' expr %prec T_UnaryMinus
| NOT expr
| READ OB CB
| NEW OB ID CB
| NEWARR OB expr COMMA type CB
| THIS
;
constant : DECCONST
| FLOAT
| BOOLCONST
| STRCONST
| NULLCONST
| HEXCONST
;
actuals : exprList
|
;
exprList : exprList COMMA expr
| expr
;
optElse : ELSE stmt
| %prec T_Lower_Than_Else
;
%%
int yyerror(char *msg)
{
printf("Invalid expression at line number: %d %s\n",yylineno,msg);
return 1;
}
void main()
{
printf("Enter expression: ");
yyin=fopen("ex.txt","r");
generateSymbolTable();
do{
if(yyparse()){
printf("Error\n");exit(0);
}
}while(feof(yyin)!=0);
printf("Success\n");
}
//scoper.h
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<math.h>
1. Token ID
2. Variable Name
3. Data Type
4. Scope
5. Scope ID
5. Arguments
6. Argument count
7. Return type
*/
char content[100];
int lineno;
}line;
int id;
char name[100];
char type[100];
int size;
int entrypoint;
}var;
int id;
char return_type[100];
char func_name[100];
var args[100];
int argc;
int entrypoint;
int exitpoint;
}function;
//Visible fields
int id;
char name[100];
char type[100];
int size;
char scope;
int scopeID;
int argc;
char ret_type[100];
int lifetime;
//Hidden fields
int entrypoint;
int isFunction;
int exitpoint;
}token;
char keyword[][1000] =
{"void","int","double","bool","string","class","interface","null","this","extends","implements","for","whi
le","if","else","return","break","New","NewArray","Print","ReadInteger","ReadLine"};
for(i=0;i<size;i++){
if(strcmp(s,keyword[i])==0)
return 1;
return 0;
for(i=0;i<size;i++){
if(strcmp(s,datatype[i])==0)
return 1;
return 0;
int l = strlen(s);
int i;
for(i=1;i<l;i++)
flag &= (s[0]>='a' && s[0]<='z') || (s[0]>='A' && s[0]<='Z') || (s[0]=='_') || (s[i]>='0' &&
s[i]<='9');
return flag;
void findFirstLexeme(char *s,char *lex){ // Finding the first word of the line.
int pos=0;
lex[pos] = s[pos];
pos++;
lex[pos]=='\0';
void getFirstWord(char *pcontent,char *word){ // Find the first word in a string when leading
whitespace characters are possible
int state=0,l=strlen(pcontent),i=0,pos=0;
while(i<l){
break;
state = 1;
word[pos] = pcontent[i];
pos++;
else{
if(!isWhitespace(pcontent[i]))
word[pos] = pcontent[i];
else
break;
pos++;
}
i++;
word[pos]='\0';
return;
int l = strlen(pcontent),pos=0,i=0;
while(!isWhitespace(pcontent[i]))
i++;
i++;
while(pcontent[i]!='('){
name[pos] = pcontent[i];
pos++;
i++;
name[pos]='\0';
return;
int start=0,brpos=0,end=0,l=strlen(tmp.content);
// printf("%s %d\n",tmp.content,l);
brpos++;
start = brpos;
brpos++;
end = brpos;
if(end==l || start==l)
return 0;
start++;end--;
return 1;
int pos=0;
char pcontent[100];
while(start<=end){
pcontent[pos] = tmp.content[start];
pos++;
start++;
pcontent[pos]='\0';
char word[100];
getFirstWord(pcontent,word);
char tmp[100];
getFirstWord(inp.content,tmp);
strcpy(f->return_type,tmp);
f->entrypoint = inp.lineno;
getFuncName(inp.content,tmp);
strcpy(f->func_name,tmp);
return;
int balance=1;
start++;
while(balance>0){
if(inp[start].content[0]=='{')
balance++;
else if(inp[start].content[0]=='}')
balance--;
start++;
return start-1;
int start=0;
while(decl[start]!='(')
start++;
start++;
int end=start;
while(decl[end]!=')')
end++;
end--;
if(start>end){
arglist[0]='\0';
return;
}
int pos=0;
while(start<=end){
arglist[pos] = decl[start];
pos++;
start++;
arglist[pos] = '\0';
return;
void getArg(char *decl,int typelen,char *arglist){ // Get arg name when there's only one variable
int start=typelen+1,l=strlen(decl),pos=0;
while(start<l){
arglist[pos] = decl[start];
start++;
pos++;
arglist[pos] = '\0';
return;
void generateArgs(char *decl,int typelen,char *arglist){ // the 3rd function that generates arguments.
Consider modularization
int start=typelen+1,l=strlen(decl),pos=0;
while(start<l){
arglist[pos] = decl[start];
start++;
pos++;
arglist[pos-1] = '\0';
return;
strcpy(tmp->name,name);
strcpy(tmp->type,type);
tmp->id = id;
tmp->entrypoint = lineno;
return;
int pos=0;
char ret[100];
ret[pos] = tmp[pos];
pos++;
ret[pos] = '\0';
strcpy(tmp,ret);
return;
if(strcmp(s,"char")==0) return 1;
if(!tmp->isFunction)
printf("%-12d%-12s%-12s%-12d%-12c%-12d%-12s%-12s%-12d%-12s\n",tmp->id,tmp-
>name,tmp->type,tmp->size,tmp->scope,tmp->scopeID,"NA","NA",tmp->lifetime,"NA");
else{
printf("%-12d%-12s%-12s%-12s%-12c%-12d%-12s%-12d%-12d",tmp->id,tmp-
>name,tmp->type,"NA",tmp->scope,tmp->scopeID,tmp->ret_type,tmp->argc,tmp->lifetime);
int i;
if(tmp->argc == 0){
printf("%-12s\n","None");
return;
for(i=0;i<(tmp->argc - 1);i++)
printf("%s,",tmp->args[i]->name);
printf("%s\n",tmp->args[tmp->argc - 1]->name);
return;
if(a->entrypoint == b->entrypoint)
return a->isFunction < b->isFunction;
printf("%-12s%-12s%-12s%-12s%-12s%-12s%-12s%-12s%-12s%-12s\n\n","Token
ID","Name","Type","Size","Scope","ScopeID","Ret Type","Argc","Lifetime","Arguments");
int i;
for(i=0;i<tcount;i++)
printToken(&(tlist[i]));
// Main function
void generateSymbolTable(){
//Definitions
line inp[100];
char c;
int lines=0;
int i,j;
line loi[100];
function flist[100];
int loicount=0;
int fcount=0;
int linescope[100];
memset(linescope,-1,sizeof(linescope));
var vlist[100];
int vcount=0;
token tlist[100];
int tcount=0;
// Take input
while(scanf("%[^\n]",inp[lines].content)!=EOF){
// printf("%s\n",inp[lines]);
inp[lines].lineno = lines;
lines++;
c = getc(stdin);
for(i=0;i<lines;i++){
char tmp[100];
for(j=0;j<strlen(inp[i].content);j++){
if(!isWhitespace(inp[i].content[j]) || state){
state = 1;
tmp[pos++] = inp[i].content[j];
tmp[pos] = '\0';
strcpy(inp[i].content,tmp);
while(i<lines){
char str[100];
memset(str,0,sizeof(str));
findFirstLexeme(inp[i].content,str);
// printf("%s %s\n",str,inp[i].content);
strcpy(loi[loicount].content,inp[i].content);
loi[loicount].lineno = inp[i].lineno;
loicount++;
i++;
printf("Generating Functions...\n");
i=0;
while(i<loicount){
if(isFunc(loi[i])){
parse(loi[i],&flist[fcount]);
flist[fcount].id = fcount;
flist[fcount].exitpoint = getExitpoint(inp,loi[i].lineno+1);
flist[fcount].argc = 0;
for(j=flist[fcount].entrypoint;j<=flist[fcount].exitpoint;j++)
linescope[j]=flist[fcount].id;
fcount++;
}
i++;
printf("\n");
printf("Identifying Variables...\n");
i=0;
while(i<loicount){
char arglist[100];
if(isFunc(loi[i])){
getFargs(loi[i].content,arglist);
char *v,type[100],name[100];
v = strtok(arglist,",");
while(v != NULL){
char type[100];
getFirstWord(v,type);
getArg(v,strlen(type),name);
printf("%s %s\n",type,name);
create(&(vlist[vcount]),name,type,vcount,loi[i].lineno);
vcount++;
v = strtok(NULL,",");
else{
// printf("Variables : %s\n",loi[i].content);
char type[100];
getFirstWord(loi[i].content,type);
//Parsing the variables from the arglist.
generateArgs(loi[i].content,strlen(type),arglist);
char *v;
v = strtok(arglist,",");
while(v != NULL){
strip(v);
create(&(vlist[vcount]),v,type,vcount,loi[i].lineno);
vcount++;
v = strtok(NULL,",");
i++;
for(i=0;i<vcount;i++){
strcpy(tlist[tcount].name,vlist[i].name);
strcpy(tlist[tcount].type,vlist[i].type);
tlist[tcount].size = getSize(tlist[tcount].type);
tlist[tcount].scope = linescope[vlist[i].entrypoint]>=0?'L':'G';
tlist[tcount].entrypoint = vlist[i].entrypoint;
tlist[tcount].isFunction = 0;
tlist[tcount].scopeID = -1;
tlist[tcount].lifetime = -1;
tcount++;
}
// Tokenize the functions now.
for(i=0;i<fcount;i++){
strcpy(tlist[tcount].name,flist[i].func_name);
strcpy(tlist[tcount].ret_type,flist[i].return_type);
strcpy(tlist[tcount].type,"FUNC");
tlist[tcount].size = -1;
if(i==0 || (strcmp(tlist[tcount].name,"main")==0))
tlist[tcount].scope = 'G';
else
tlist[tcount].scope = 'L';
tlist[tcount].entrypoint = flist[i].entrypoint;
tlist[tcount].exitpoint = flist[i].exitpoint;
tlist[tcount].isFunction = 1;
tlist[tcount].scopeID = linescope[tlist[tcount].entrypoint];
tcount++;
qsort(tlist,tcount,sizeof(token),cmp);
for(i=0;i<tcount;i++){
tlist[i].id = i+1;
if(!tlist[i].isFunction)
continue;
tlist[i].argc = 0;
for(j=0;j<tcount;j++){
tlist[i].args[tlist[i].argc] = &(tlist[j]);
tlist[i].argc++;
}
tlist[j].scopeID = tlist[i].scopeID;
printSymbolTable(tlist,tcount);