(* 1. BASIC PARSERS: stream of chars -> string *) let (one_letter: char Stream.t -> string) = parser | [< '( 'a'..'z' ) as c >] -> String.make 1 c let (one_Letter: char Stream.t -> string) = parser | [< '( 'A'..'Z' ) as c >] -> String.make 1 c let (one_digit: char Stream.t -> string) = parser | [< '( '0'..'9') as c >] -> String.make 1 c let rec (some_digits: char Stream.t -> string) = parser | [< s1 = one_digit ; s2 = some_digits >] -> s1^s2 | [<>] -> "" let (one_ident_symbol: char Stream.t -> string) = parser | [< s = one_letter >] -> s | [< s = one_Letter >] -> s | [< s = one_digit >] -> s | [< ''_' >] -> "_" let rec (some_ident_symbols: char Stream.t -> string) = parser | [< s1 = one_ident_symbol ; s2 = some_ident_symbols >] -> s1 ^ s2 | [<>] -> "" let (one_symbol: char Stream.t -> string) = parser | [< '( '-'|'.'|'='|';'|',' ) as c >] -> String.make 1 c let (one_ident: char Stream.t -> string) = parser | [< s1 = one_letter ; s2 = some_ident_symbols >] -> s1 ^ s2 let (one_space: char Stream.t -> string) = parser | [< '( ' ' | '\t' | '\n' as c) >] -> String.make 1 c let (one_natural: char Stream.t -> string) = parser | [< s1 = one_digit ; s2 = some_digits >] ->s1 ^ s2 let (opt_decimal: char Stream.t -> string) = parser | [< ''.' ; d = one_natural >] -> d | [<>] -> "" (* 2. THE LEXICAL ANALYZER: stream of chars -> token list *) type token = | S of string | V of string | T of string | I of int | F of float let rec (one_number: char Stream.t -> token) = parser | [< '('-') ; t = one_number >] -> (match t with | I i -> I (-i) | F f -> F (-f) ) | [< n = one_natural ; d = opt_decimal >] -> if d = "" then I(int_of_string n) else F(float_of_string (n ^ "." ^ d)) let _types = [ "int" ; "float" ; "char" ] let rec (one_token: char Stream.t -> token) = parser | [< _ = one_space ; t = one_token >] -> t | [< t = one_number >] -> t | [< s = one_symbol >] -> S s | [< s = one_ident >] -> if List.mem s _types then T s else V s let rec (some_tokens: char Stream.t -> token list) = parser | [< t = one_token ; ts = some_tokens >] -> t::ts | [<>] -> [] let (lexical_analyzer: char Stream.t -> token list) = fun stream -> some_tokens stream (* FUNCTIONS ON STREAMS *) let (stream_is_empty: 't Stream.t -> bool) = fun stream -> (Stream.peek stream) = None let rec (string_of_stream: char Stream.t -> string) = fun stream -> match Stream.peek stream with | None -> "" | Some c -> (String.make 1 c) ^ (begin Stream.junk stream ; string_of_stream stream end) let rec (list_of_stream: 't Stream.t -> 't list) = fun stream -> match Stream.peek stream with | None -> [] | Some t -> t :: (begin Stream.junk stream ; list_of_stream stream end) (* RUNNING THE LEXICAL ANALYZER *) type ('input,'output) lexical_analyzer = 'input -> 'output type 'output lexical_analysis = | Lexical_Analysis of 'output | Lexical_Error of 'output * string let (run_lexer: (char Stream.t,token list) lexical_analyzer -> string -> (token list) lexical_analysis) = fun parse input -> let stream = Stream.of_string input in let result = try parse stream with Stream.Error _ -> [] in let recognized = stream_is_empty stream && result <> [] in begin print_string (String.concat "" [ "parser \"" ; input ; "\" = " ; if recognized then "ok" else "syntax error" ; "\n" ]) ; if recognized then Lexical_Analysis result else Lexical_Error (result , string_of_stream stream) end ;; (* TEST *) run_lexer lexical_analyzer "int x=123 ; int x,y=0001 ; float z = 0.234 ;" ;; (* 3. SYNTACTIC ANALYZER: token list -> Abstract Syntax Tree *) (* Decls -> "" | Decl . Decls Decl -> Type At_least_one_var Opt_assignment ';' At_least_one_var -> Var more_Vars more_Vars -> "" | "," At_least_one_var Opt_assignment -> "" | "=" Value Value -> Float | Int *) type a_type = Int_type | Float_type | Char_type type variable = string type variables = variable list type a_value = Int_value of int | Float_value of float | No_value type declaration = a_type * variable * a_value type declarations = declaration list let rec (some_decls: token Stream.t -> declarations) = parser | [< d1 = at_leat_one_decl ; d2 = some_decls>] -> d1 @ d2 | [<>] -> [] and (at_leat_one_decl: token Stream.t -> declarations) = parser [< t = one_type ; vs = at_least_one_var ; c = opt_assignment ; '(S ";") >] -> List.map (fun v -> (t,v,c)) vs and (one_type: token Stream.t -> a_type) = parser | [< '(T "int") >] -> Int_type | [< '(T "float") >] -> Float_type | [< '(T "char") >] -> Char_type and (one_var: token Stream.t -> variable) = parser | [< '(V v) >] -> v and (at_least_one_var: token Stream.t -> variables) = parser | [< v = one_var ; vs = more_vars >] -> v::vs and (more_vars: token Stream.t -> variable list) = parser | [< '(S ",") ; vs = at_least_one_var >] -> vs | [<>] -> [] and (opt_assignment: token Stream.t -> a_value) = parser | [< '(S "=") ; v = one_value >] -> v | [<>] -> No_value and (one_value: token Stream.t -> a_value) = parser | [< '(I n) >] -> Int_value n | [< '(F f) >] -> Float_value f (* RUNNING THE SYNTACTIC ANALYZER *) type ('input,'output) syntactic_analyzer = 'input -> 'output type 'output syntactic_analysis = | Syntactic_Analysis of 'output | Syntactic_Error of 'output * token list * string let (syntactic_analyzer: token Stream.t -> declarations) = fun stream -> some_decls stream let (run_both: ('input,'t) lexical_analyzer -> (token Stream.t,'output) syntactic_analyzer -> string -> 'output syntactic_analysis) = fun lexical_analyzer syntactic_analyzer input -> let lexical_analysis = run_lexer lexical_analyzer input in let (token_list,input_reminder) = match lexical_analysis with | Lexical_Error (token_list, input_reminder) -> (token_list, input_reminder) | Lexical_Analysis token_list -> (token_list, "") in let token_stream = Stream.of_list token_list in let output = try syntactic_analyzer token_stream with Stream.Error _ -> [] in let recognized = input_reminder = "" && stream_is_empty token_stream in begin print_string (String.concat "" [ "parser \"" ; input ; "\" = " ; if recognized then "ok" else "syntax error" ; "\n" ]) ; if recognized then Syntactic_Analysis output else Syntactic_Error (output , list_of_stream token_stream , input_reminder) end ;; (* TEST *) run_both lexical_analyzer syntactic_analyzer "int x=123 ; int x,y=0001 ; float z = 0.234 ;" ;;