(* C front-end Copyright (c) 2001 David Monniaux See the BSD.LICENSE for licensing issues. *) { open C_parser open Parsing_helper (* this code is unsafe if filename contains escaped characters *) let use_line_info = ref false;; let extract_line_info lexbuf= let line_info = Lexing.lexeme lexbuf in let end_of_number_index = String.index_from line_info 2 ' ' in let line = int_of_string (String.sub line_info 2 (end_of_number_index-2)) in try let end_of_filename_index = String.rindex line_info '"' in let filename = String.sub line_info (end_of_number_index+2) (end_of_filename_index - end_of_number_index - 2) in beginning_of_file ~line_offset: line filename with Not_found -> beginning_of_file ~line_offset:line !current_file_name;; } (** Definitions *) (*** Constants *) (**** Numeric constants *) let octal_digit = ['0'-'7'] let octal_constant = '0' octal_digit* let hex_prefix = "0x" | "0X" let hex_digit = ['a'-'f' 'A'-'F' '0'-'9'] let hex_constant = hex_prefix hex_digit+ let hex_digit_sequence = hex_digit+ let digit = ['0' - '9'] let digit_sequence = digit+ let unsigned_suffix = ['u' 'U'] let long_suffix = ['l' 'L'] let long_long_suffix = "ll" | "LL" let integer_suffix = (unsigned_suffix long_suffix?) | (unsigned_suffix long_long_suffix) | (long_suffix unsigned_suffix?) | (long_long_suffix unsigned_suffix?) let floating_suffix = ['f' 'l' 'F' 'L'] let integer_constant = (digit_sequence | hex_constant | octal_constant) integer_suffix? let fractional_constant = (digit_sequence? '.' digit_sequence) | (digit_sequence '.') let sign = ['+' '-'] let exponent_part = ['e' 'E'] sign? digit_sequence let decimal_floating_constant = (fractional_constant exponent_part? floating_suffix?) | (digit_sequence exponent_part floating_suffix?) let hex_fractional_constant = (hex_digit_sequence? '.' hex_digit_sequence) | (hex_digit_sequence '.') let binary_exponent_part = ['p' 'P'] sign? digit_sequence let hex_floating_constant = (hex_prefix hex_fractional_constant binary_exponent_part? floating_suffix?) | (hex_prefix hex_digit_sequence binary_exponent_part? floating_suffix?) let floating_constant = decimal_floating_constant | hex_floating_constant (**** Character constants *) let quote = '"' let double_quote = '"' let single_quote = '\'' let backslash = '\\' let simple_escape_sequence = (backslash single_quote) | (backslash double_quote) | (backslash '?') | (backslash backslash) | (backslash 'a') | (backslash 'b') | (backslash 'f') | (backslash 'n') | (backslash 'r') | (backslash 't') | (backslash 'v') let octal_escape_sequence = backslash octal_digit | backslash octal_digit octal_digit | backslash octal_digit octal_digit octal_digit let hex_escape_sequence = backslash "x" hex_digit+ let hex_quad = hex_digit hex_digit hex_digit hex_digit let universal_character_name = backslash ['u' 'U'] hex_quad let escape_sequence = simple_escape_sequence | octal_escape_sequence | hex_escape_sequence | universal_character_name let c_char = [^ '\'' '\\' '\n'] | escape_sequence let character_constant = single_quote c_char* single_quote | 'L' single_quote c_char* single_quote (**** String constants *) let s_char = [^ '"' '\\' '\n'] | escape_sequence let string_literal = (quote s_char* quote) | ('L' quote s_char* quote) (*** Identifiers *) let nondigit = ['_' 'a'-'z' 'A'-'Z'] let identifier_nondigit = nondigit | universal_character_name (* no other characters *) let identifier = identifier_nondigit (identifier_nondigit | digit)* (*** Whitespace *) let whitespace = [' ' '\t' '\012']+ let newline = "\n" | "\r" | "\r\n" (*** Interface with preprocessor *) let line_control = "#" whitespace? ['0'-'9']+ ((whitespace string_literal [^ '\n'] *)?) '\n' let pragma = "#" whitespace? "pragma" [^ '\n'] * '\n' (** Rules *) rule normal_token = parse integer_constant { CLEX_integer_constant((Lexing.lexeme lexbuf), (extent lexbuf)) } | floating_constant { CLEX_floating_constant((Lexing.lexeme lexbuf), (extent lexbuf)) } | character_constant { let s = Lexing.lexeme lexbuf in CLEX_character_constant((String.sub s 1 ((String.length s)-2)), (extent lexbuf)) } | string_literal { (* TODO: this is wrong *) let s = Lexing.lexeme lexbuf in CLEX_string_constant((String.sub s 1 ((String.length s)-2)), (extent lexbuf)) } | "[" { CLEX_lbracket(extent lexbuf) } | "]" { CLEX_rbracket(extent lexbuf) } | "(" { CLEX_lparen(extent lexbuf) } | ")" { CLEX_rparen(extent lexbuf) } (* This is a hack *) | "{" { enter_block(); CLEX_lbrace(extent lexbuf) } | "}" { exit_block(); CLEX_rbrace(extent lexbuf) } | "." { CLEX_period(extent lexbuf) } | "->" { CLEX_arrow(extent lexbuf) } | "++" { CLEX_plus_plus(extent lexbuf) } | "--" { CLEX_minus_minus(extent lexbuf) } | "&" { CLEX_ampersand(extent lexbuf) } | "*" { CLEX_star(extent lexbuf) } | "+" { CLEX_plus(extent lexbuf) } | "-" { CLEX_minus(extent lexbuf) } | "~" { CLEX_tilde(extent lexbuf) } | "!" { CLEX_exclamation(extent lexbuf) } | "/" { CLEX_slash(extent lexbuf) } | "%" { CLEX_percent(extent lexbuf) } | "<<" { CLEX_less_less(extent lexbuf) } | ">>" { CLEX_greater_greater(extent lexbuf) } | "<" { CLEX_less(extent lexbuf) } | ">" { CLEX_greater(extent lexbuf) } | "<=" { CLEX_less_equal(extent lexbuf) } | ">=" { CLEX_greater_equal(extent lexbuf) } | "==" { CLEX_equal_equal(extent lexbuf) } | "!=" { CLEX_exclamation_equal(extent lexbuf) } | "^" { CLEX_caret(extent lexbuf) } | "|" { CLEX_pipe(extent lexbuf) } | "&&" { CLEX_ampersand_ampersand(extent lexbuf) } | "||" { CLEX_pipe_pipe(extent lexbuf) } | "?" { CLEX_interrogation(extent lexbuf) } | ":" { CLEX_colon(extent lexbuf) } | ";" { CLEX_semicolon(extent lexbuf) } | "..." { CLEX_ellipsis(extent lexbuf) } | "=" { CLEX_equal(extent lexbuf) } | "*=" { CLEX_star_equal(extent lexbuf) } | "/=" { CLEX_slash_equal(extent lexbuf) } | "%=" { CLEX_percent_equal(extent lexbuf) } | "+=" { CLEX_plus_equal(extent lexbuf) } | "-=" { CLEX_minus_equal(extent lexbuf) } | "<<=" { CLEX_less_less_equal(extent lexbuf) } | ">>=" { CLEX_greater_greater_equal(extent lexbuf) } | "&=" { CLEX_ampersand_equal(extent lexbuf) } | "^=" { CLEX_caret_equal(extent lexbuf) } | "|=" { CLEX_pipe_equal(extent lexbuf) } | "," { CLEX_comma(extent lexbuf) } | "#" { CLEX_hash(extent lexbuf) } | "##" { CLEX_hash_hash(extent lexbuf) } (* Those are in the standard, but I don't know what they're for! - D.M. *) | "<:" { CLEX_less_colon(extent lexbuf) } | ":>" { CLEX_colon_greater(extent lexbuf) } | "<%" { CLEX_less_percent(extent lexbuf) } | "%>" { CLEX_percent_greater(extent lexbuf) } | "%:" { CLEX_percent_colon(extent lexbuf) } | "%:%:" { CLEX_percent_colon_percent_colon(extent lexbuf) } | identifier { let s = (Lexing.lexeme lexbuf) and e = (extent lexbuf) in match s with "auto" -> CLEX_auto(e) | "break" -> CLEX_break(e) | "case" -> CLEX_case(e) | "char" -> CLEX_char(e) | "const" -> CLEX_const(e) | "continue" -> CLEX_continue(e) | "default" -> CLEX_default(e) | "do" -> CLEX_do(e) | "double" -> CLEX_double(e) | "else" -> CLEX_else(e) | "enum" -> CLEX_enum(e) | "extern" -> CLEX_extern(e) | "float" -> CLEX_float(e) | "for" -> CLEX_for(e) | "goto" -> CLEX_goto(e) | "if" -> CLEX_if(e) | "inline" -> CLEX_inline(e) | "int" -> CLEX_int(e) | "assert" -> CLEX_assert(e) (* WARNING: added keyword *) | "known_fact" -> CLEX_known_fact(e) (* WARNING: added keyword *) | "analysis_log" -> CLEX_analysis_log(e) (* WARNING: added keyword *) | "wait_for_clock" -> CLEX_wait_for_clock(e) (* WARNING: added keyword *) | "partition_begin" -> CLEX_partition_begin(e) (* WARNING: added keyword *) | "partition_merge" -> CLEX_partition_merge(e) (* WARNING: added keyword *) | "partition_controle" -> CLEX_partition_controle(e) (* WARNING: added keyword *) | "long" -> CLEX_long(e) | "register" -> CLEX_register(e) | "restrict" -> CLEX_restrict(e) | "return" -> CLEX_return(e) | "short" -> CLEX_short(e) | "signed" -> CLEX_signed(e) | "sizeof" -> CLEX_sizeof(e) | "static" -> CLEX_static(e) | "struct" -> CLEX_struct(e) | "switch" -> CLEX_switch(e) | "typedef" -> CLEX_typedef(e) | "union" -> CLEX_union(e) | "unsigned" -> CLEX_unsigned(e) | "void" -> CLEX_void(e) | "volatile" -> CLEX_volatile(e) | "while" -> CLEX_while(e) | "_Bool" -> CLEX__Bool(e) | "_Complex" -> CLEX__Complex(e) | "_Imaginary" -> CLEX__Imaginary(e) | _ -> if is_a_typedef s then CLEX_typedef_identifier(s, e) else CLEX_identifier(s, e) } | "/*" { comment lexbuf; normal_token lexbuf } | whitespace { normal_token lexbuf } | newline { next_line lexbuf; beginning_of_line_token lexbuf } | eof { CLEX_eof(extent lexbuf) } and comment = parse "*/" { () } | _ { comment lexbuf } and beginning_of_line_token = parse whitespace { beginning_of_line_token lexbuf } | line_control { (if !use_line_info then extract_line_info lexbuf else next_line lexbuf); beginning_of_line_token lexbuf } | pragma {(* #pragma is ignored *) next_line lexbuf; beginning_of_line_token lexbuf } | "" { normal_token lexbuf } { let token lexbuf = beginning_of_line_token lexbuf }