(* while to Html *)

{
  (* we check about reading on command line *)
  let () =
    if Array.length Sys.argv < 2 || not (Sys.file_exists Sys.argv.(1))
    then begin
      Printf.eprintf
        "usage: while2html file <-nums=true> \n";
      exit 1
    end

  (* we open the input file *)
  let file = Sys.argv.(1)
  let cout = open_out (file ^ ".html")
  let print s = Printf.fprintf cout s

  (* we write the HTML header *)
  let () = print "<html><head><title>%s</title></head><body>\n<pre>" file

  let flag_opt =
    try Sys.argv.(2) = "-nums=true"
    with _ -> false

  let count = ref 0

  (* and count lines *)
  let newline () =
    if flag_opt then (incr count; print "\n%3d: " !count)
    else  print "\n"

  let () = newline ()


(* key words *)
  let key_words = ["if"; "else"; "print"; "while"; "and"; "or"; "not"; "True"; "False"]

 (* is_keyword is a function that checks whether an identifier is a key word *)
  let is_keyword =
    let ht = Hashtbl.create 97 in
    List.iter (fun s -> Hashtbl.add ht s ()) key_words;
    fun s -> Hashtbl.mem ht s

}

(* regular expressions for the identifiers *)

let ident = ['A'-'Z' 'a'-'z' '_'] ['A'-'Z' 'a'-'z' '0'-'9' '_']*

(* scanning function *)

rule scan = parse
  | "#"    { print "<font color=\"990000\">#";
             comment_one_line lexbuf;
             print "</font>";
             newline ();
             scan lexbuf }
  | "(*"   { print "<font color=\"990000\">(*";
             comment lexbuf;
             print "</font>";
             scan lexbuf }

  | eof    { () }
  | ident as s
           { if is_keyword s then begin
               print "<font color=\"green\"><b>%s</b></font>" s
             end else
               print "%s" s;
             scan lexbuf }
  | "<"    { print "&lt;"; scan lexbuf }
  | "&"    { print "&amp;"; scan lexbuf }
  | "\n"   { newline (); scan lexbuf }
  | '"'    { print "\""; string lexbuf; scan lexbuf }
  | "'\"'"
  | _ as s { print "%s" s; scan lexbuf }

(* lexical analysis fo comments *)
and comment_one_line = parse
  | "\n"   { () }
  | "<"    { print "&lt;"; comment_one_line lexbuf }
  | "&"    { print "&amp;"; comment_one_line lexbuf }
  | '"'    { print "\""; string lexbuf; comment_one_line lexbuf }
  | "'\"'"
  | _ as s { print "%s" s; comment_one_line lexbuf }

and comment = parse
  | "(*"   { print "(*"; comment lexbuf; comment lexbuf }
  | "*)"   { print "*)" }
  | eof    { () }
  | "\n"   { newline (); comment lexbuf }
  | '"'    { print "\""; string lexbuf; comment lexbuf }
  | "<"    { print "&lt;"; comment lexbuf }
  | "&"    { print "&amp;"; comment lexbuf }
  | "'\"'"
  | _ as s { print "%s" s; comment lexbuf }

and string = parse
  | '"'    { print "\"" }
  | "<"    { print "&lt;"; string lexbuf }
  | "&"    { print "&amp;"; string lexbuf }
  | "\\" _
  | _ as s { print "%s" s; string lexbuf }

{

  (* we create a lexical analysis buffer, call scan on it,
     the we write the end of HTML file and close the out-channel. *)
  let () =
    let cin = open_in file in
    scan (Lexing.from_channel cin);
    print "</pre>\n</body></html>\n";
    close_out cout;
    close_in cin;

}