(***********************************************************************)
(*                                                                     *)
(*                           Objective Caml                            *)
(*                                                                     *)
(*              Xavier Leroy, projet Cristal, INRIA Rocquencourt       *)
(*                                                                     *)
(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
(*  en Automatique.  All rights reserved.  This file is distributed    *)
(*  under the terms of the GNU Library General Public License, with    *)
(*  the special exception on linking described in file ../LICENSE.     *)
(*                                                                     *)
(***********************************************************************)

(* $Id: genlex.ml,v 1.9 2002/04/18 07:27:42 garrigue Exp $ *)

(*
   Modified by Erwan Jahier
   in order to add source info to tokens

*)

type source_info = int * int (* line and column *)

type token =
    Kwd of source_info * string
  | Ident of source_info * string
  | Int of source_info * int
  | Float of source_info * float
  | String of source_info * string
  | Char of source_info * char


(* The string buffering machinery *)

let initial_buffer = String.create 32

let buffer = ref initial_buffer
let bufpos = ref 0

let reset_buffer () = buffer := initial_buffer; bufpos := 0

let store c =
  if !bufpos >= Bytes.length !buffer then
    begin
      let newbuffer = Bytes.create (2 * !bufpos) in
	   Bytes.blit !buffer 0 newbuffer 0 !bufpos; buffer := newbuffer
    end;
  Bytes.set !buffer !bufpos c;
  incr bufpos

let get_string () =
  let s = Bytes.sub !buffer 0 !bufpos in
  buffer := initial_buffer;
  Bytes.to_string s

(* The lexer *)

let my_int_of_string = Util.my_int_of_string

let make_lexer keywords =
  let kwd_table = Hashtbl.create 17 in
    List.iter (fun s -> Hashtbl.add kwd_table s "dummy") keywords;
    let ident_or_keyword id s e =
      if
	Hashtbl.mem kwd_table id
      then
	Kwd ((s, e), id)
      else
	Ident ((s, e), id)
    and keyword_or_error c s e=
      let id = String.make 1 c in
	if
	  Hashtbl.mem kwd_table id
	then
	  Kwd ((s, e), id)
	else
	  raise (Stream.Error ("Illegal character " ^ id))
    in
    let rec next_token (strm__ : _ Stream.t) =
      let debut = Stream.count strm__ in
	match Stream.peek strm__ with
	    Some (' ' | '\010' | '\013' | '\009' | '\026' | '\012') ->
              Stream.junk strm__; next_token strm__
	  | Some ('A'..'Z' | 'a'..'z' | '_' | '\192'..'\255' as c) ->
              Stream.junk strm__;
              let s = strm__ in reset_buffer (); store c; ident s debut
	  | Some
              ('!' | '%' | '&' | '$' | '#' | '+' | '/' | ':' | '<' | '=' | '>' |
		 '?' | '@' | '\\' | '~' | '^' | '|' | '*' as c) ->
              Stream.junk strm__;
		let s = strm__ in reset_buffer (); store c; ident2 s debut
	  | Some ('0'..'9' as c) ->
              Stream.junk strm__;
              let s = strm__ in reset_buffer (); store c; number s
	  | Some '\'' ->
              Stream.junk strm__;
              let c =
		try char strm__ with
		    Stream.Failure -> raise (Stream.Error "")
              in
		begin match Stream.peek strm__ with
		    Some '\'' -> Stream.junk strm__; Some (Char ((debut, Stream.count strm__),c))
		  | _ -> raise (Stream.Error "")
		end
	  | Some '"' ->
              Stream.junk strm__;
              let s = strm__ in
	      let str = reset_buffer ();(string s) in
		Some (String ((debut,(Stream.count strm__)), str))
	  | Some '-' -> Stream.junk strm__; maybe_one_line_comment strm__
	  | Some '(' -> Stream.junk strm__; maybe_comment strm__
	  | Some c -> Stream.junk strm__; Some (keyword_or_error c debut (Stream.count strm__))
	  | _ -> None
    and ident (strm__ : _ Stream.t) (debut : int) =
      match Stream.peek strm__ with
	  Some
            ('A'..'Z' | 'a'..'z' | '\192'..'\255' | '0'..'9' | '_' | '\'' as c) ->
              Stream.junk strm__; let s = strm__ in store c; ident s debut
	| _ ->
	    let str = (get_string ()) in
	    let fin = (Stream.count strm__) in
	      Some (ident_or_keyword str debut fin)
    and ident2 (strm__ : _ Stream.t) debut =
	match Stream.peek strm__ with
	    Some
              ('!' | '%' | '&' | '$' | '#' | '+' | '-' | '/' | ':' | '<' | '=' |
		 '>' | '?' | '@' | '\\' | '~' | '^' | '|' | '*' as c) ->
		Stream.junk strm__; let s = strm__ in store c; ident2 s debut
	  | _ ->
	      let str = (get_string ()) in
	      let fin = (Stream.count strm__) in
		Some (ident_or_keyword str debut fin)
    and neg_number (strm__ : _ Stream.t) =
      let debut = Stream.count strm__ in
	match Stream.peek strm__ with
	  Some ('0'..'9' as c) ->
            Stream.junk strm__;
            let s = strm__ in reset_buffer (); store '-'; store c; number s
	| _ -> let s = strm__ in reset_buffer (); store '-'; ident2 s debut
    and number (strm__ : _ Stream.t) =
      let debut = Stream.count strm__ in
      match Stream.peek strm__ with
	  Some ('0'..'9' as c) ->
            Stream.junk strm__; let s = strm__ in store c; number s
	| Some '.' ->
            Stream.junk strm__; let s = strm__ in store '.'; decimal_part s
	| Some ('e' | 'E') ->
            Stream.junk strm__; let s = strm__ in store 'E'; exponent_part s
	| _ ->
	    let s = (get_string ()) in
	      Some (Int ((debut,(Stream.count strm__)), 
                         (my_int_of_string s)
                        ))
    and decimal_part (strm__ : _ Stream.t) =
      let debut = Stream.count strm__ in
      match Stream.peek strm__ with
	  Some ('0'..'9' as c) ->
            Stream.junk strm__; let s = strm__ in store c; decimal_part s
	| Some ('e' | 'E') ->
            Stream.junk strm__; let s = strm__ in store 'E'; exponent_part s
	| _ ->
	    let s = (get_string ()) in
	    Some (Float ((debut,(Stream.count strm__)), (float_of_string s )))
    and exponent_part (strm__ : _ Stream.t) =
      match Stream.peek strm__ with
	  Some ('+' | '-' as c) ->
            Stream.junk strm__; let s = strm__ in store c; end_exponent_part s
	| _ -> end_exponent_part strm__
    and end_exponent_part (strm__ : _ Stream.t) =
      let debut = Stream.count strm__ in
      match Stream.peek strm__ with
	  Some ('0'..'9' as c) ->
            Stream.junk strm__; let s = strm__ in store c; end_exponent_part s
	| _ ->
	    let s = (get_string ()) in
	      Some (Float ((debut,(Stream.count strm__)), (float_of_string s)))
    and string (strm__ : _ Stream.t) =
      match Stream.peek strm__ with
	  Some '"' -> Stream.junk strm__; get_string ()
	| Some '\\' ->
            Stream.junk strm__;
            let c =
              try escape strm__ with
		  Stream.Failure -> raise (Stream.Error "")
            in
            let s = strm__ in store c; string s
	| Some c -> Stream.junk strm__; let s = strm__ in store c; string s
	| _ -> raise Stream.Failure
    and char (strm__ : _ Stream.t) =
      match Stream.peek strm__ with
	  Some '\\' ->
            Stream.junk strm__;
            begin try escape strm__ with
		Stream.Failure -> raise (Stream.Error "")
            end
	| Some c -> Stream.junk strm__; c
	| _ -> raise Stream.Failure
    and escape (strm__ : _ Stream.t) =
      match Stream.peek strm__ with
	  Some 'n' -> Stream.junk strm__; '\n'
	| Some 'r' -> Stream.junk strm__; '\r'
	| Some 't' -> Stream.junk strm__; '\t'
	| Some ('0'..'9' as c1) ->
            Stream.junk strm__;
            begin match Stream.peek strm__ with
		Some ('0'..'9' as c2) ->
		  Stream.junk strm__;
		  begin match Stream.peek strm__ with
		      Some ('0'..'9' as c3) ->
			Stream.junk strm__;
			Char.chr
			  ((Char.code c1 - 48) * 100 + (Char.code c2 - 48) * 10 +
			   (Char.code c3 - 48))
		    | _ -> raise (Stream.Error "")
		  end
              | _ -> raise (Stream.Error "")
            end
	| Some c -> Stream.junk strm__; c
	| _ -> raise Stream.Failure


(* lustre-like one line comment "--" *)
    and maybe_one_line_comment (strm__ : _ Stream.t) =
      let _debut = Stream.count strm__ in
      match Stream.peek strm__ with
	  Some '-' ->
            Stream.junk strm__; let s = strm__ in one_line_comment s; next_token s
	| _ -> neg_number strm__
    and one_line_comment (strm__ : _ Stream.t) =
      match Stream.peek strm__ with
	  Some '\n' -> Stream.junk strm__; ()
	| Some c -> Stream.junk strm__; one_line_comment strm__
	| None -> ()

(* multiple line comments *)
    and maybe_comment (strm__ : _ Stream.t) =
      let debut = Stream.count strm__ in
      match Stream.peek strm__ with
	  Some '*' ->
            Stream.junk strm__; let s = strm__ in comment s; next_token s
	| _ -> Some (keyword_or_error '(' debut (debut+1))
    and comment (strm__ : _ Stream.t) =
      match Stream.peek strm__ with
	  Some '(' -> Stream.junk strm__; maybe_nested_comment strm__
	| Some '*' -> Stream.junk strm__; maybe_end_comment strm__
	| Some c -> Stream.junk strm__; comment strm__
	| _ -> raise Stream.Failure
    and maybe_nested_comment (strm__ : _ Stream.t) =
      match Stream.peek strm__ with
	  Some '*' -> Stream.junk strm__; let s = strm__ in comment s; comment s
	| Some c -> Stream.junk strm__; comment strm__
	| _ -> raise Stream.Failure
    and maybe_end_comment (strm__ : _ Stream.t) =
      match Stream.peek strm__ with
	  Some ')' -> Stream.junk strm__; ()
	| Some '*' -> Stream.junk strm__; maybe_end_comment strm__
	| Some c -> Stream.junk strm__; comment strm__
	| _ -> raise Stream.Failure
    in
      fun input -> Stream.from (fun count -> next_token input)
