Exercises
Filtering Comments Out
Comments in Objective CAML are hierarchical. We can thus comment away
sections of text, including those containing comments. A comment
starts with characters (*
and finishes with *)
. Here's
an example:
(* comment spread
over several
lines *)
let
succ
x
=
(* successor function *)
x
+
1
;;
(* level 1 commented text
let old_succ y = (* level 2 successor function level 2 *)
y
+
1
;;
level
1
*
)
succ
2
;;
The aim of this exercise is to create a new text without comments. You
are free to choose whatever lexical analysis tool you wish.
-
Write a lexer able to recognize Objective CAML
comments. These start with a
(*
and end with a *)
.
Your lexer should ensure
comments are balanced, that is to say the number of comment
openings equals the number of comment closings.
We are not interested in other constructions in the language which
may contain characters (*
and *)
.
(** fichier comment1.mll **)
{
let
ignore_lex
lb
=
ignore
(Lexing.lexeme
lb)
let
traite_normal
=
ignore_lex
let
traite_comment
=
ignore_lex
exception
Commentaires_mal_balances
}
rule
normal
=
parse
"(*"
{
ignore_lex
lexbuf
;
commentaire
lexbuf
;
normal
lexbuf
}
|
"*)"
{
raise
Commentaires_mal_balances
}
|
_
{
traite_normal
lexbuf
;
normal
lexbuf
}
|
eof
{
()
}
and
commentaire
=
parse
"(*"
{
ignore_lex
lexbuf
;
commentaire
lexbuf
;
commentaire
lexbuf
}
|
"*)"
{
ignore_lex
lexbuf
}
|
_
{
traite_comment
lexbuf
;
commentaire
lexbuf
}
|
eof
{
raise
Commentaires_mal_balances
}
- Write a program which takes a file, reads it,
filters comments away and writes a new file with the remaining text.
(** fichier comment2.mll **)
{
let
ignore_lex
lb
=
ignore
(Lexing.lexeme
lb)
let
sortie
=
ref
stdout
let
init_sortie
f
=
sortie
:=
f
let
traite_normal
lb
=
output_string
!
sortie
(Lexing.lexeme
lb)
let
traite_comment
=
ignore_lex
exception
Commentaires_mal_balances
}
rule
normal
=
parse
"(*"
{
ignore_lex
lexbuf
;
commentaire
lexbuf
;
normal
lexbuf
}
|
"*)"
{
raise
Commentaires_mal_balances
}
|
_
{
traite_normal
lexbuf
;
normal
lexbuf
}
|
eof
{
()
}
and
commentaire
=
parse
"(*"
{
ignore_lex
lexbuf
;
commentaire
lexbuf
;
commentaire
lexbuf
}
|
"*)"
{
ignore_lex
lexbuf
}
|
_
{
traite_comment
lexbuf
;
commentaire
lexbuf
}
|
eof
{
raise
Commentaires_mal_balances
}
{
let
decommente
src
dest
=
let
file_in
=
open_in
src
in
let
lb
=
Lexing.from_channel
file_in
in
let
file_out
=
open_out
dest
in
init_sortie
file_out
;
normal
lb
;
close_in
file_in
;
close_out
file_out
;;
let
usage
()
=
print_string
"comment2 filein fileout"
;
print_newline()
;;
let
main
()
=
if
Array.length
(Sys.argv)
<>
3
then
usage
()
else
decommente
Sys.argv.
(1
)
Sys.argv.
(2
)
;;
main
();;
}
- In Objective CAML character strings may contain any character,
even the sequences
(*
and *)
. For example, character
string "what(*ever te*)xt"
should not be considered a
comment. Modify your lexer to consider
character strings.
(** fichier comment3.mll **)
{
let
ignore_lex
lb
=
ignore
(Lexing.lexeme
lb)
let
sortie
=
ref
stdout
let
init_sortie
f
=
sortie
:=
f
let
traite_normal
lb
=
output_string
!
sortie
(Lexing.lexeme
lb)
let
traite_comment
=
ignore_lex
exception
Commentaires_mal_balances
}
rule
normal
=
parse
"(*"
{
ignore_lex
lexbuf
;
commentaire
lexbuf
;
normal
lexbuf
}
|
"*)"
{
raise
Commentaires_mal_balances
}
|
'"'
{
traite_normal
lexbuf
;
chaine
lexbuf
;
normal
lexbuf
}
|
_
{
traite_normal
lexbuf
;
normal
lexbuf
}
|
eof
{
()
}
and
commentaire
=
parse
"(*"
{
ignore_lex
lexbuf
;
commentaire
lexbuf
;
commentaire
lexbuf
}
|
"*)"
{
ignore_lex
lexbuf
}
|
_
{
traite_comment
lexbuf
;
commentaire
lexbuf
}
|
eof
{
raise
Commentaires_mal_balances
}
and
chaine
=
parse
'"'
{
traite_normal
lexbuf
;
()
}
|
"\\\""
{
traite_normal
lexbuf
;
chaine
lexbuf
}
|
_
{
traite_normal
lexbuf
;
chaine
lexbuf
}
- Use this new lexer to remove comments from an
Objective CAML program .
(** fichier comment4.mll **)
{
let
ignore_lex
lb
=
ignore
(Lexing.lexeme
lb)
let
sortie
=
ref
stdout
let
init_sortie
f
=
sortie
:=
f
let
traite_normal
lb
=
output_string
!
sortie
(Lexing.lexeme
lb)
let
traite_comment
=
ignore_lex
exception
Commentaires_mal_balances
}
rule
normal
=
parse
"(*"
{
ignore_lex
lexbuf
;
commentaire
lexbuf
;
normal
lexbuf
}
|
"*)"
{
raise
Commentaires_mal_balances
}
|
'"'
{
traite_normal
lexbuf
;
chaine
lexbuf
;
normal
lexbuf
}
|
_
{
traite_normal
lexbuf
;
normal
lexbuf
}
|
eof
{
()
}
and
commentaire
=
parse
"(*"
{
ignore_lex
lexbuf
;
commentaire
lexbuf
;
commentaire
lexbuf
}
|
"*)"
{
ignore_lex
lexbuf
}
|
_
{
traite_comment
lexbuf
;
commentaire
lexbuf
}
|
eof
{
raise
Commentaires_mal_balances
}
and
chaine
=
parse
'"'
{
traite_normal
lexbuf
;
()
}
|
"\\\""
{
traite_normal
lexbuf
;
chaine
lexbuf
}
|
_
{
traite_normal
lexbuf
;
chaine
lexbuf
}
{
let
decommente
src
dest
=
let
file_in
=
open_in
src
in
let
lb
=
Lexing.from_channel
file_in
in
let
file_out
=
open_out
dest
in
init_sortie
file_out
;
normal
lb
;
close_in
file_in
;
close_out
file_out
;;
let
usage
()
=
print_string
"comment2 filein fileout"
;
print_newline()
;;
let
main
()
=
if
Array.length
(Sys.argv)
<>
3
then
usage
()
else
decommente
Sys.argv.
(1
)
Sys.argv.
(2
)
;;
main
();;
}
Evaluator
We will use ocamlyacc to implement an expression
evaluator. The idea is to perform the evaluation of
expressions directly in the grammar rules.
We choose a (completely parenthesized) prefix arithmetic expression
language with variable arity operators. For example, expression
(ADD e1 e2 .. en) is equivalent to e1 + e2 + .. + en. Plus
and times operators are right-associative and subtraction and
division are left-associative.
- Define in file opn_parser.mly the
parsing and evaluation rules for an expression.
%{
let
rec
app_right
f
xs
=
match
xs
with
[
x]
->
x
|
x::xs
->
f
x
(app_right
f
xs)
|
_
->
failwith"missing argument"
;;
let
rec
app_left
f
xs
=
match
xs
with
[
x]
->
x
|
x1::x2::xs
->
app_left
f
((f
x1
x2)::xs)
|
_
->
failwith"missing argument"
;;
let
t
=
Hashtbl.create
3
;;
(
Hashtbl.add
t
"ADD"
(app_right
(+.
));
Hashtbl.add
t
"SUB"
(app_left
(-.
));
Hashtbl.add
t
"MUL"
(app_right
(
*.
));
Hashtbl.add
t
"DIV"
(app_left
(/.
))
)
;;
let
apply
o
vs
=
try
(Hashtbl.find
t
o)
vs
with
Not_found
->
(Printf.eprintf"Unknown operator %s\n"
o;
exit(1
))
;;
%}
%
token
Lpar
Rpar
%
token
<
float>
Num
%
token
<
string>
Atom
%
start
term
%
type
<
float>
term
%
type
<
float
list>
terms
%%
term
:
Num
{
$
1
}
|
Lpar
Atom
terms
Rpar
{
(apply
$
2
$
3
)
}
;
terms
:
term
{
[$
1
]
}
|
term
terms
{
$
1
::$
2
}
;
%%
- Define in file opn_lexer.mll the
lexical analysis of expressions.
{
open
Opn_parser
}
rule
lexer
=
parse
[
' '
'\n'
]
{
lexer
lexbuf
}
|
'('
{
Lpar
}
|
')'
{
Rpar
}
|
'-'
?[
'0'
-
'9'
]*
'.'
?[
'0'
-
'9'
]*
{
Num
(float_of_string
(Lexing.lexeme
lexbuf))
}
|
[
'A'
-
'z'
]+
{
Atom
(Lexing.lexeme
lexbuf)
}
- Write a simple main program opn which reads
a line from standard input containing an expression and prints the
result of evaluating the expression.
open
Opn_lexer
;;
open
Opn_parser
;;
Printf.printf"? "
;
flush
stdout;
let
buf
=
Lexing.from_string
(input_line
stdin)
in
Printf.printf
"= %f\n"
(Opn_parser.term
Opn_lexer.lexer
buf)
;;