Parser using flex and Bison
I am using flex and bison to implement a parser. A problem that I came across is that how can I give the tokens of s separate flex file to the parser. when I try to compile my parser.tab.c
it complains about "undefined reference to yylex"
.
Then I tried to set the -d option when compiling and include the header file to the flex file.
I just want to know the correct steps of using flex and bison( with the relevant commands to compile and run) I am using gcc compiler.
Thanx a lot
/* Token Scanner for C- language */
%{
#include <stdio.h>
#include <stdlib.h>
#include "parser.tab.h"
extern YYSTYPE yylval;
%}
digit [0-9]
letter [a-zA-Z]
NUM {digit}+
ID {letter}+
KEY_WORD else|if|int|return|void|while
PLUS_OPERATOR "+"
MINUS_OPERATOR "-"
MUL_OPERATOR "*"
DIV_OPERATOR "/"
LESS_THAN_OPERATOR "<"
LESS_THAN_OR_EQUAL_OPER "<="
GREATER_THAN_OPERATOR ">"
GREATER_THAN_OR_EQUAL_OPERATOR ">="
EQUAL_OPERATOR "="
OBJ_EQUAL_OPERATOR "=="
NOT_EQUAL_OPERATOR "!="
COMMA_SYMBOL ","
SEMI_COLON_SYMBOL ";"
LEFT_BRAC_SYMBOL "("
RIGHT_BRAC-SYMBOL ")"
LEFT_SQUARE_BRAC_SYMBOL "["
RIGHT_SQUARE_BRAC_SYMBOL "]"
LEFT_CURLY_BRAC_SYMBOL "{"
RIGHT_CURLY_BRAC_SYMBOL "}"
LEFT_COMMENT "/*"
RIGHT_COMMENT "*/"
ELSE "else"
IF "if"
INT "int"
RETURN "return"
VOID "void"
WHILE "while"
SYMBOL "+"|"-"|"*"|"/"|"<"|"<="|">"|">="|"=="|"!="|"="|";"|","|"("|")"|"{"|"}"|"["|"]"|"/*"|"*/"
WHITESPACE [ \t\n]+
COMMENT "/*"(.)*({WHITESPACE})*(.)*"*/"
ERROR1 {NUM}(({ID}|{KEY_WORD})|{NUM})+
ERROR2 {ID}(({NUM}|{KEY_WORD})|{ID})+
ERROR3 {KEY_WORD}(({ID}|{NUM})|{KEY_WORD})+
ERROR ERROR1|ERROR2|ERROR3
%%
{NUM} {
return NUM;
}
{ELSE} {
return ELSE;
}
{IF} {
return IF;
}
{INT} {
return INT;
}
{RETURN} {
return RETURN;
}
{VOID} {
return VOID;
}
{WHILE} {
return WHILE;
}
{ID} {
return ID;
}
{PLUS_OPERATOR} {
return PLUS_OPERATOR;
}
{MINUS_OPERATOR} {
return MINUS_OPERATOR;
}
{MUL_OPERATOR} {
return MUL_OPERATOR;
}
{DIV_OPERATOR} {
return DIV_OPERATOR;
}
{LESS_THAN_OPERATOR} {
return LESS_THAN_OPERATOR;
}
{LESS_THAN_OR_EQUAL_OPER} {
return LESS_THAN_OR_EQUAL_OPER;
}
{GREATER_THAN_OPERATOR} {
return GREATER_THAN_OPERATOR;
}
{GREATER_THAN_OR_EQUAL_OPERATOR} {
return GREATER_THAN_OR_EQUAL_OPERATOR;
}
{EQUAL_OPERATOR} {
return EQUAL_OPERATOR;
}
{OBJ_EQUAL_OPERATOR} {
return NOT_EQUAL_OPERATOR;
}
{NOT_EQUAL_OPERATOR} {
return NOT_EQUAL_OPERATOR;
}
{COMMA_SYMBOL} {
return COMMA_SYMBOL;
}
{SEMI_COLON_SYMBOL} {
return SEMI_COLON_SYMBOL;
}
{LEFT_BRAC_SYMBOL} {
return LEFT_BRAC_SYMBOL;
}
{RIGHT_BRAC-SYMBOL} {
return RIGHT_BRAC_SYMBOL;
}
{LEFT_SQUARE_BRAC_SYMBOL} {
return LEFT_SQUARE_BRAC_SYMBOL;
}
{RIGHT_SQUARE_BRAC_SYMBOL} {
return RIGHT_SQUARE_BRAC_SYMBOL;
}
{LEFT_CURLY_BRAC_SYMBOL} {
return LEFT_CURLY_BRAC_SYMBOL;
}
{RIGHT_CURLY_BRAC_SYMBOL} {
return RIGHT_CURLY_BRAC_SYMBOL;
}
{LEFT_COMMENT} {
return LEFT_COMMENT;
}
{RIGHT_COMMENT} {
return RIGHT_COMMENT;
}
{WHITESPACE} {
}
{ERROR} {
}
%%
main( argc, argv )
int argc;
char **argv;
{
++argv, --argc; /* skip over program name */
if ( argc > 0 )
yyin = fopen( argv[0], "r" );
else
yyin = stdin;
yylex();
}
int yywrap(void){return 1;}
parser:
%{
#include <stdio.h>
#include <ctype.h>
#define YYDEBUG 1
%}
%token ID NUM PLUS_OPERATOR MINUS_OPERATOR MUL_OPERATOR DIV_OPERATOR LESS_THAN_OPERATOR LESS_THAN_OR_EQUAL_OPER GREATER_THAN_OPERATOR GREATER_THAN_OR_EQUAL_OPERATOR EQUAL_OPERATOR OBJ_EQUAL_OPERATOR NOT_EQUAL_OPERATOR COMMA_SYMBOL SEMI_COLON_SYMBOL LEFT_BRAC_SYMBOL RIGHT_BRAC_SYMBOL LEFT_SQUARE_BRAC_SYMBOL RIGHT_SQUARE_BRAC_SYMBOL LEFT_CURLY_BRAC_SYMBOL RIGHT_CURLY_BRAC_SYMBOL LEFT_COMMENT RIGHT_COMMENT ELSE IF INT RETURN VOID WHILE
%expect 1
%%
program: declaration_list
;
declaration_list: declaration_list declaration
| declaration { printf("njuwandusanduansduasdsdsdsa"); }
;
declaration : var_declaration
| fun_declaration { printf("njuwandusanduansduasdsdsdsa");}
;
var_declaration : type_specifier ID SEMI_COLON_SYMBOL
| type_specifier ID LEFT_SQUARE_BRAC_SYMBOL NUM RIGHT_SQUARE_BRAC_SYMBOL COMMA_SYMBOL { printf("njuwandusanduansduasdsdsdsa"); }
;
type_specifier : INT
| VOID { printf("njuwandusanduansduasdsdsdsa");}
;
fun_declaration : type_specifier ID LEFT_BRAC_SYMBOL params RIGHT_BRAC_SYMBOL compound_stmt
;
params : param_list
| VOID
;
param_list : param_list COMMA_SYMBOL param
| param
;
param : type_specifier ID
| type_specifier ID LEFT_SQUARE_BRAC_SYMBOL RIGHT_SQUARE_BRAC_SYMBOL
;
compound_stmt : LEFT_CURLY_BRAC_SYMBOL local_declarations statement_list RIGHT_CURLY_BRAC_SYMBOL
;
local_declarations : local_declarations var_declaration
| /* empty */
;
statement_list : statement_list statement
|/* empty */
;
statement : expression_stmt
| compound_stmt
| selection_stmt
| iteration_stmt
| return_stmt
;
expression_stmt : expression SEMI_COLON_SYMBOL
| SEMI_COLON_SYMBOL
;
selection_stmt : IF LEFT_BRAC_SYMBOL expression RIGHT_BRAC_SYMBOL statement
| IF LEFT_BRAC_SYMBOL expression RIGHT_BRAC_SYMBOL statement ELSE statement
;
iteration_stmt : WHILE LEFT_BRAC_SYMBOL expression RIGHT_BRAC_SYMBOL statement
;
return_stmt : RETURN SEMI_COLON_SYMBOL
| RETURN expression SEMI_COLON_SYMBOL
;
expression: var EQUAL_OPERATOR expression
| simple_expression
;
var : ID
| ID LEFT_SQUARE_BRAC_SYMBOL expression RIGHT_SQUARE_BRAC_SYMBOL
;
simple_expression : additive_expression relop additive_expression
| additive_expression
;
relop : LESS_THAN_OR_EQUAL_OPER
| LESS_THAN_OPERATOR
| GREATER_THAN_OPERATOR
| GREATER_THAN_OR_EQUAL_OPERATOR
| OBJ_EQUAL_OPERATOR
| NOT_EQUAL_OPERATOR
;
additive_expression : additive_expression addop term
| term
;
addop : PLUS_OPERATOR
| MINUS_OPERATOR { printf("njuwandusanduansduasdsdsdsa"); }
;
term : term mulop factor { $$ = $1 + $3; }
| factor
;
mulop : MUL_OPERATOR
| DIV_OPERATOR
;
factor : LEFT_BRAC_SYMBOL expression RIGHT_BRAC_SYMBOL
| var
| call
| NUM
;
call : ID LEFT_BRAC_SYMBOL args RIGHT_BRAC_SYMBOL
;
args : arg_list
|/* empty */
;
arg_list 开发者_如何学JAVA: arg_list COMMA_SYMBOL expression
| expression
;
%%
main()
{
extern int yydebug;
yydebug=1;
return yyparse();
}
int yyerror(char * s)
{
fprintf(stderr, "%s\n",s);
return 0;
}
"yylex is the lexical analyzer function, it recognizes tokens from the input stream and returns them to the parser. Bison does not create this function automatically so you must write it so that yyparse can call it."
Source: documentation...
So you have to write the yylex function so that it calls the flex one.
Both Flex and Bison give good full examples and perfect documentation:
- Flex
- Bison
yylex is a function defined after you run your flex file through flex
. The "undefined reference to yylex" from GCC is telling you that the linker could not find yylex's definition. Your issue is you are trying to produce an executable image without all the pieces.
For example, suppose you have 3 files, main.c, flex.l, parser.y. You could do this to compile them.
flex flex.l
bison -d parser.y
gcc main.c parser.tab.c lex.yy.c -o myEXE
This would produce an executable. But, suppose you have many many files, and doing this all the time would be very slow. You would want to compile piece by piece.
flex flex.l
gcc -c lex.yy.c -o lex.o
bison -d parser.y
gcc -c parser.tab.c -o parser.o
gcc -c main.c -o main.o
gcc main.o parser.o lex.o
The -c option tells gcc to compile and produce an object file (source code->compiled to assembly->assembly assembled). It does not do any linking.
Now it's getting into how to use make
, which you can find several excellent tutorials on google.
If you contain only 2 files like flex.l and parser.y and try to compile means please compile like this
gcc -o myexe parser.tab.c lex.yy.c -lfl
lfl is a link file library for lexers.if you missing to include -lfl means you got an error:Undefined reference to yylex.
精彩评论