(* 1. BASIC PARSERS: stream of chars -> string *) let (one_letter : char Stream.t -> string) = fun (__strm : _ Stream.t) -> match Stream.peek __strm with | Some (('a' .. 'z' as c)) -> (Stream.junk __strm; String.make 1 c) | _ -> raise Stream.Failure let (one_Letter : char Stream.t -> string) = fun (__strm : _ Stream.t) -> match Stream.peek __strm with | Some (('A' .. 'Z' as c)) -> (Stream.junk __strm; String.make 1 c) | _ -> raise Stream.Failure let (one_digit : char Stream.t -> string) = fun (__strm : _ Stream.t) -> match Stream.peek __strm with | Some (('0' .. '9' as c)) -> (Stream.junk __strm; String.make 1 c) | _ -> raise Stream.Failure let rec (some_digits : char Stream.t -> string) = fun (__strm : _ Stream.t) -> match try Some (one_digit __strm) with | Stream.Failure -> None with | Some s1 -> let s2 = (try some_digits __strm with | Stream.Failure -> raise (Stream.Error "")) in s1 ^ s2 | _ -> "" let (one_ident_symbol : char Stream.t -> string) = fun (__strm : _ Stream.t) -> try one_letter __strm with | Stream.Failure -> (try one_Letter __strm with | Stream.Failure -> (try one_digit __strm with | Stream.Failure -> (match Stream.peek __strm with | Some '_' -> (Stream.junk __strm; "_") | _ -> raise Stream.Failure))) let rec (some_ident_symbols : char Stream.t -> string) = fun (__strm : _ Stream.t) -> match try Some (one_ident_symbol __strm) with | Stream.Failure -> None with | Some s1 -> let s2 = (try some_ident_symbols __strm with | Stream.Failure -> raise (Stream.Error "")) in s1 ^ s2 | _ -> "" let (one_symbol : char Stream.t -> string) = fun (__strm : _ Stream.t) -> match Stream.peek __strm with | Some (('-' | '.' | '=' | ';' | ',' as c)) -> (Stream.junk __strm; String.make 1 c) | _ -> raise Stream.Failure let (one_ident : char Stream.t -> string) = fun (__strm : _ Stream.t) -> let s1 = one_letter __strm in let s2 = try some_ident_symbols __strm with | Stream.Failure -> raise (Stream.Error "") in s1 ^ s2 let (one_space : char Stream.t -> string) = fun (__strm : _ Stream.t) -> match Stream.peek __strm with | Some ((' ' | '\t' | '\n' as c)) -> (Stream.junk __strm; String.make 1 c) | _ -> raise Stream.Failure let (one_natural : char Stream.t -> string) = fun (__strm : _ Stream.t) -> let s1 = one_digit __strm in let s2 = try some_digits __strm with | Stream.Failure -> raise (Stream.Error "") in s1 ^ s2 let (opt_decimal : char Stream.t -> string) = fun (__strm : _ Stream.t) -> match Stream.peek __strm with | Some '.' -> (Stream.junk __strm; (try one_natural __strm with | Stream.Failure -> raise (Stream.Error ""))) | _ -> "" (* 2. THE LEXICAL ANALYZER: stream of chars -> token list *) type token = | S of string | V of string | T of string | I of int | F of float let (one_number : char Stream.t -> token) = fun (__strm : _ Stream.t) -> let n = one_natural __strm in let d = try opt_decimal __strm with | Stream.Failure -> raise (Stream.Error "") in if d = "" then I (int_of_string n) else F (float_of_string (n ^ ("." ^ d))) let _types = [ "int"; "float"; "char" ] let rec (one_token : char Stream.t -> token) = fun (__strm : _ Stream.t) -> match try Some (one_space __strm) with | Stream.Failure -> None with | Some _ -> (try one_token __strm with | Stream.Failure -> raise (Stream.Error "")) | _ -> (try one_number __strm with | Stream.Failure -> (match try Some (one_symbol __strm) with | Stream.Failure -> None with | Some s -> S s | _ -> let s = one_ident __strm in if List.mem s _types then T s else V s)) let rec (some_tokens : char Stream.t -> token list) = fun (__strm : _ Stream.t) -> match try Some (one_token __strm) with | Stream.Failure -> None with | Some t -> let ts = (try some_tokens __strm with | Stream.Failure -> raise (Stream.Error "")) in t :: ts | _ -> [] let (lexical_analyzer : char Stream.t -> token list) = fun stream -> some_tokens stream (* FUNCTIONS ON STREAMS *) let (stream_is_empty : 't Stream.t -> bool) = fun stream -> (Stream.peek stream) = None let rec (string_of_stream : char Stream.t -> string) = fun stream -> match Stream.peek stream with | None -> "" | Some c -> (String.make 1 c) ^ (Stream.junk stream; string_of_stream stream) let rec (list_of_stream : 't Stream.t -> 't list) = fun stream -> match Stream.peek stream with | None -> [] | Some t -> t :: (Stream.junk stream; list_of_stream stream) (* RUNNING THE LEXICAL ANALYZER *) type ('input, 'output) lexical_analyzer = 'input -> 'output type 'output lexical_analysis = | Lexical_Analysis of 'output | Lexical_Error of 'output * string let (run_lexer : (char Stream.t, token list) lexical_analyzer -> string -> (token list) lexical_analysis) = fun parse input -> let stream = Stream.of_string input in let result = try parse stream with | Stream.Error _ -> [] in let recognized = (stream_is_empty stream) && (result <> []) in (print_string (String.concat "" [ "parser \""; input; "\" = "; if recognized then "ok" else "syntax error"; "\n" ]); if recognized then Lexical_Analysis result else Lexical_Error (result, string_of_stream stream)) (* TEST *) let _ = run_lexer lexical_analyzer "int x=123 ; int x,y=0001 ; float z = 0.234 ;" (* 3. SYNTACTIC ANALYZER: token list -> Abstract Syntax Tree *) (* Decls -> "" | Decl . Decls Decl -> Type At_least_one_var Opt_assignment ';' At_least_one_var -> Var more_Vars more_Vars -> "" | "," At_least_one_var Opt_assignment -> "" | "=" Value Value -> Float | Int *) type a_type = | Int_type | Float_type | Char_type type variable = string type variables = variable list type a_value = | Int_value of int | Float_value of float | No_value type declaration = (a_type * variable * a_value) type declarations = declaration list let rec (some_decls : token Stream.t -> declarations) = fun (__strm : _ Stream.t) -> match try Some (at_leat_one_decl __strm) with | Stream.Failure -> None with | Some d1 -> let d2 = (try some_decls __strm with | Stream.Failure -> raise (Stream.Error "")) in d1 @ d2 | _ -> [] and (at_leat_one_decl : token Stream.t -> declarations) = fun (__strm : _ Stream.t) -> let t = one_type __strm in let vs = try at_least_one_var __strm with | Stream.Failure -> raise (Stream.Error "") in let c = try opt_assignment __strm with | Stream.Failure -> raise (Stream.Error "") in match Stream.peek __strm with | Some (S ";") -> (Stream.junk __strm; List.map (fun v -> (t, v, c)) vs) | _ -> raise (Stream.Error "") and (one_type : token Stream.t -> a_type) = fun (__strm : _ Stream.t) -> match Stream.peek __strm with | Some (T "int") -> (Stream.junk __strm; Int_type) | Some (T "float") -> (Stream.junk __strm; Float_type) | Some (T "char") -> (Stream.junk __strm; Char_type) | _ -> raise Stream.Failure and (one_var : token Stream.t -> variable) = fun (__strm : _ Stream.t) -> match Stream.peek __strm with | Some (V v) -> (Stream.junk __strm; v) | _ -> raise Stream.Failure and (at_least_one_var : token Stream.t -> variables) = fun (__strm : _ Stream.t) -> let v = one_var __strm in let vs = try more_vars __strm with | Stream.Failure -> raise (Stream.Error "") in v :: vs and (more_vars : token Stream.t -> variable list) = fun (__strm : _ Stream.t) -> match Stream.peek __strm with | Some (S ",") -> (Stream.junk __strm; (try at_least_one_var __strm with | Stream.Failure -> raise (Stream.Error ""))) | _ -> [] and (opt_assignment : token Stream.t -> a_value) = fun (__strm : _ Stream.t) -> match Stream.peek __strm with | Some (S "=") -> (Stream.junk __strm; (try one_value __strm with | Stream.Failure -> raise (Stream.Error ""))) | _ -> No_value and (one_value : token Stream.t -> a_value) = fun (__strm : _ Stream.t) -> match Stream.peek __strm with | Some (I n) -> (Stream.junk __strm; Int_value n) | Some (F f) -> (Stream.junk __strm; Float_value f) | _ -> raise Stream.Failure (* RUNNING THE SYNTACTIC ANALYZER *) type ('input, 'output) syntactic_analyzer = 'input -> 'output type 'output syntactic_analysis = | Syntactic_Analysis of 'output | Syntactic_Error of 'output * token list * string let (syntactic_analyzer : token Stream.t -> declarations) = fun stream -> some_decls stream let (run_both : ('input, 't) lexical_analyzer -> (token Stream.t, 'output) syntactic_analyzer -> string -> 'output syntactic_analysis) = fun lexical_analyzer syntactic_analyzer input -> let lexical_analysis = run_lexer lexical_analyzer input in let (token_list, input_reminder) = match lexical_analysis with | Lexical_Error (token_list, input_reminder) -> (token_list, input_reminder) | Lexical_Analysis token_list -> (token_list, "") in let token_stream = Stream.of_list token_list in let output = try syntactic_analyzer token_stream with | Stream.Error _ -> [] in let recognized = (input_reminder = "") && (stream_is_empty token_stream) in (print_string (String.concat "" [ "parser \""; input; "\" = "; if recognized then "ok" else "syntax error"; "\n" ]); if recognized then Syntactic_Analysis output else Syntactic_Error (output, list_of_stream token_stream, input_reminder)) (* TEST *) let _ = run_both lexical_analyzer syntactic_analyzer "int x=123 ; int x,y=0001 ; float z = 0.234 ;"