

{

  (*

    Basic C lexer for use within the Cquot camlp4 quotation expander.
    note: this is probably not an entirely correct lexer, but it is close.

    Author: Graydon Hoare <graydon@redhat.com>
    Copyright (C) 2001, 2002 Red Hat.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
    MA 02111-1307, USA
    
  *)
  
  let trim = function
      s -> String.sub s 1 ((String.length s) - 2)

  let unquote = function
      s -> 
	let trimmed = trim s in
	let len = String.length trimmed in
	let qtype_len = String.index trimmed ':' in
	let quote_type = String.sub trimmed 0 qtype_len in
	let quote_val = String.sub trimmed (qtype_len + 1) (len - (qtype_len + 1)) in
	let upper_type = "ANTI_" ^ (String.uppercase quote_type) in
	  (upper_type, quote_val)
	
  let keywords = 
    let h = Hashtbl.create 32 in
      List.iter (fun kw -> Hashtbl.add h kw ())
	[ "auto"; "break"; "case"; "char"; "const"; "continue";
	  "default"; "do"; "double"; "else"; "enum"; "extern"; "float";
	  "for"; "goto"; "if"; "int"; "long"; "register"; "return";
	  "short"; "signed"; "sizeof"; "static"; "struct"; "switch";
	  "typedef"; "union"; "unsigned"; "void"; "volatile"; "while" ];
      h

}

let alpha = ['a'-'z''A'-'Z'] 
let num = ['0'-'9']+ 
let alphanum = alpha | num 
let leading = '_' | alpha
let following = '_' | alphanum
let singles = ['*' '~' '!' '%' '^' '&' ';' ':' '?' '<' '>'
               '[' ']' '(' ')' '{' '}' '-' '+' '=' '|' '.' ',']

let multis = "<<" | ">>" | "==" | "!=" | "+=" | "-="  | "*=" 
           | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>="
           | "<=" | ">=" | "++" | "--" | "->" | "&&"  | "||"

let quote = "expr"   | "stmt" | "stmts" | "decl" | "body" | "func" 
	   | "ident" | "str"  | "int"   | "flo"  | "char" 
	       
let ws = [' ' '\r' '\t' '\n']

rule c_token = parse
    ('-'?)(num+)         { ("INT", Lexing.lexeme lexbuf)            }
  | ('-'?)(num+)('.'num*)?(['e''E']?['+''-']?num+)?
                         { ("FLOAT", Lexing.lexeme lexbuf)          }
  | "0x" ['0'-'9''a'-'f']+
                         { ("INT", Lexing.lexeme lexbuf)            }
  | multis               { ("", Lexing.lexeme lexbuf)               }
  | singles              { ("", Lexing.lexeme lexbuf)               }
  | leading following*   { let str = Lexing.lexeme lexbuf in
			     (if (Hashtbl.mem keywords str) 
			      then ("", str)
			      else ("IDENT", str))                  }
  | eof                  { ("EOI","")                               }
  | '"'("\\\""|[^'"'])*'"'     
                         { ("STRING", (trim (Lexing.lexeme lexbuf)))         }
  | '\'' _ '\''          { ("CHAR", (String.sub (Lexing.lexeme lexbuf) 1 1)) }

  | '$' quote ':' [^'$']+ '$'      { unquote (Lexing.lexeme lexbuf) }
  | _                    { c_token lexbuf                           }
