diff --git a/lib/linguist/heuristics.rb b/lib/linguist/heuristics.rb index 52ead64e..0087bb51 100644 --- a/lib/linguist/heuristics.rb +++ b/lib/linguist/heuristics.rb @@ -261,5 +261,14 @@ module Linguist Language["Makefile"] end end + + disambiguate "OCaml", "Standard ML" do |data| + if /module|let rec |match\s+(\S+\s)+with/.match(data) + Language["OCaml"] + elsif /=> |case\s+(\S+\s)+of/.match(data) + Language["Standard ML"] + end + end + end end diff --git a/samples/OCaml/cmdliner.ml b/samples/OCaml/cmdliner.ml new file mode 100644 index 00000000..71e49e85 --- /dev/null +++ b/samples/OCaml/cmdliner.ml @@ -0,0 +1,1344 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2011 Daniel C. Bünzli. All rights reserved. + Distributed under a BSD3 license, see license at the end of the file. + %%NAME%% release %%VERSION%% + ---------------------------------------------------------------------------*) + +(* Invalid_arg strings *) + +let err_argv = "argv array must have at least one element" +let err_not_opt = "Option argument without name" +let err_not_pos = "Positional argument with a name" +let err_help s = "term error, help requested for unknown command " ^ s +let err_empty_list = "empty list" + +(* A few useful definitions. *) + +let rev_compare n n' = compare n' n +let str = Printf.sprintf +let pr = Format.fprintf +let pr_str = Format.pp_print_string +let pr_char = Format.pp_print_char +let str_of_pp pp v = pp Format.str_formatter v; Format.flush_str_formatter () +let quote s = str "`%s'" s +let alts_str ?(quoted = true) alts = + let quote = if quoted then quote else (fun s -> s) in + match alts with + | [] -> invalid_arg err_empty_list + | [a] -> (quote a) + | [a; b] -> str "either %s or %s" (quote a) (quote b) + | alts -> + let rev_alts = List.rev alts in + str "one of %s or %s" + (String.concat ", " (List.rev_map quote (List.tl rev_alts))) + (quote (List.hd rev_alts)) + +let pr_white_str spaces ppf s = (* spaces and new lines with Format's funs *) + let left = ref 0 and right = ref 0 and len = String.length s in + let flush () = + Format.pp_print_string ppf (String.sub s !left (!right - !left)); + incr right; left := !right; + in + while (!right <> len) do + if s.[!right] = '\n' then (flush (); Format.pp_force_newline ppf ()) else + if spaces && s.[!right] = ' ' then (flush (); Format.pp_print_space ppf ()) + else incr right; + done; + if !left <> len then flush () + +let pr_text = pr_white_str true +let pr_lines = pr_white_str false +let pr_to_temp_file pr v = try + let exec = Filename.basename Sys.argv.(0) in + let file, oc = Filename.open_temp_file exec "out" in + let ppf = Format.formatter_of_out_channel oc in + pr ppf v; Format.pp_print_flush ppf (); close_out oc; + at_exit (fun () -> try Sys.remove file with Sys_error e -> ()); + Some file +with Sys_error _ -> None + +(* Levenshtein distance, for making spelling suggestions in case of error. *) + +let levenshtein_distance s t = + (* As found here http://rosettacode.org/wiki/Levenshtein_distance#OCaml *) + let minimum a b c = min a (min b c) in + let m = String.length s in + let n = String.length t in + (* for all i and j, d.(i).(j) will hold the Levenshtein distance between + the first i characters of s and the first j characters of t *) + let d = Array.make_matrix (m+1) (n+1) 0 in + for i = 0 to m do d.(i).(0) <- i done; + for j = 0 to n do d.(0).(j) <- j done; + for j = 1 to n do + for i = 1 to m do + if s.[i-1] = t.[j-1] then + d.(i).(j) <- d.(i-1).(j-1) (* no operation required *) + else + d.(i).(j) <- minimum + (d.(i-1).(j) + 1) (* a deletion *) + (d.(i).(j-1) + 1) (* an insertion *) + (d.(i-1).(j-1) + 1) (* a substitution *) + done; + done; + d.(m).(n) + +let suggest s candidates = + let add (min, acc) name = + let d = levenshtein_distance s name in + if d = min then min, (name :: acc) else + if d < min then d, [name] else + min, acc + in + let dist, suggs = List.fold_left add (max_int, []) candidates in + if dist < 3 (* suggest only if not too far *) then suggs else [] + +(* Tries. This implementation also maps any non ambiguous prefix of a + key to its value. *) + +module Trie : sig + type 'a t + val empty : 'a t + val is_empty : 'a t -> bool + val add : 'a t -> string -> 'a -> 'a t + val find : 'a t -> string -> [ `Ok of 'a | `Ambiguous | `Not_found ] + val ambiguities : 'a t -> string -> string list + val of_list : (string * 'a) list -> 'a t +end = struct + module Cmap = Map.Make (Char) (* character maps. *) + type 'a value = (* type for holding a bound value. *) + | Pre of 'a (* value is bound by the prefix of a key. *) + | Key of 'a (* value is bound by an entire key. *) + | Amb (* no value bound because of ambiguous prefix. *) + | Nil (* not bound (only for the empty trie). *) + + type 'a t = { v : 'a value; succs : 'a t Cmap.t } + let empty = { v = Nil; succs = Cmap.empty } + let is_empty t = t = empty + + (* N.B. If we replace a non-ambiguous key, it becomes ambiguous but it's + not important for our use. Also the following is not tail recursive but + the stack is bounded by key length. *) + let add t k d = + let rec aux t k len i d pre_d = + if i = len then { v = Key d; succs = t.succs } else + let v = match t.v with + | Amb | Pre _ -> Amb | Key _ as v -> v | Nil -> pre_d + in + let succs = + let t' = try Cmap.find k.[i] t.succs with Not_found -> empty in + Cmap.add k.[i] (aux t' k len (i + 1) d pre_d) t.succs + in + { v; succs } + in + aux t k (String.length k) 0 d (Pre d (* allocate less *)) + + let find_node t k = + let rec aux t k len i = + if i = len then t else + aux (Cmap.find k.[i] t.succs) k len (i + 1) + in + aux t k (String.length k) 0 + + let find t k = + try match (find_node t k).v with + | Key v | Pre v -> `Ok v | Amb -> `Ambiguous | Nil -> `Not_found + with Not_found -> `Not_found + + let ambiguities t p = (* ambiguities of [p] in [t]. *) + try + let t = find_node t p in + match t.v with + | Key _ | Pre _ | Nil -> [] + | Amb -> + let add_char s c = s ^ (String.make 1 c) in + let rem_char s = String.sub s 0 ((String.length s) - 1) in + let to_list m = Cmap.fold (fun k t acc -> (k,t) :: acc) m [] in + let rec aux acc p = function + | ((c, t) :: succs) :: rest -> + let p' = add_char p c in + let acc' = match t.v with + | Pre _ | Amb -> acc + | Key _ -> (p' :: acc) + | Nil -> assert false + in + aux acc' p' ((to_list t.succs) :: succs :: rest) + | [] :: [] -> acc + | [] :: rest -> aux acc (rem_char p) rest + | [] -> assert false + in + aux [] p (to_list t.succs :: []) + with Not_found -> [] + + let of_list l = List.fold_left (fun t (s, v) -> add t s v) empty l +end + +(* The following types keep untyped information about arguments and + terms. This data is used to parse the command line, report errors + and format man page information. *) + +type absence = (* what happens if the argument is absent from the cl. *) + | Error (* an error is reported. *) + | Val of string Lazy.t (* if <> "", takes the given default value. *) + +type opt_kind = (* kinds of optional arguments. *) + | Flag (* just a flag, without value. *) + | Opt (* value is required. *) + | Opt_vopt of string (* option value is optional, takes given default. *) + +type pos_kind = (* kinds of positional arguments. *) + | All (* all positional arguments. *) + | Nth of bool * int (* specific position. *) + | Left of bool * int (* all args on the left of a position. *) + | Right of bool * int (* all args on the right of a position. *) + +type arg_info = (* information about a command line argument. *) + { id : int; (* unique id for the argument. *) + absent : absence; (* behaviour if absent. *) + doc : string; (* help. *) + docv : string; (* variable name for the argument in help. *) + docs : string; (* title of help section where listed. *) + p_kind : pos_kind; (* positional arg kind. *) + o_kind : opt_kind; (* optional arg kind. *) + o_names : string list; (* names (for opt args). *) + o_all : bool; } (* repeatable (for opt args). *) + +let arg_id = (* thread-safe UIDs, Oo.id (object end) was used before. *) + let c = ref 0 in + fun () -> + let id = !c in + incr c; if id > !c then assert false (* too many ids *) else id + +let is_opt a = a.o_names <> [] +let is_pos a = a.o_names = [] + +module Amap = Map.Make (* arg info maps. *) + (struct type t = arg_info let compare a a' = compare a.id a'.id end) + +type arg = (* unconverted argument data as found on the command line. *) + | O of (int * string * (string option)) list (* (pos, name, value) of opt. *) + | P of string list + +type cmdline = arg Amap.t (* command line, maps arg_infos to arg value. *) + +type man_block = [ (* block of manpage text. *) + | `S of string | `P of string | `I of string * string | `Noblank ] + +type term_info = + { name : string; (* name of the term. *) + version : string option; (* version (for --version). *) + tdoc : string; (* one line description of term. *) + tdocs : string; (* title of man section where listed (commands). *) + sdocs : string; (* standard options, title of section where listed. *) + man : man_block list; } (* man page text. *) + +type eval_info = (* information about the evaluation context. *) + { term : term_info * arg_info list; (* term being evaluated. *) + main : term_info * arg_info list; (* main term. *) + choices : (term_info * arg_info list) list} (* all term choices. *) + +let eval_kind ei = (* evaluation with multiple terms ? *) + if ei.choices = [] then `Simple else + if (fst ei.term) == (fst ei.main) then `M_main else `M_choice + +module Manpage = struct + type title = string * int * string * string * string + type block = man_block + type t = title * block list + + let p_indent = 7 (* paragraph indentation. *) + let l_indent = 4 (* label indentation. *) + let escape subst esc buf s = + let subst s = + let len = String.length s in + if not (len > 1 && s.[1] = ',') then (subst s) else + if len = 2 then "" else + esc s.[0] (String.sub s 2 (len - 2)) + in + Buffer.clear buf; Buffer.add_substitute buf subst s; + let s = Buffer.contents buf in (* twice for $(i,$(mname)). *) + Buffer.clear buf; Buffer.add_substitute buf subst s; + Buffer.contents buf + + let pr_tokens ?(groff = false) ppf s = + let is_space = function ' ' | '\n' | '\r' | '\t' -> true | _ -> false in + let len = String.length s in + let i = ref 0 in + try while (true) do + while (!i < len && is_space s.[!i]) do incr i done; + let start = !i in + if start = len then raise Exit; + while (!i < len && not (is_space s.[!i]) && not (s.[!i] = '-')) do + incr i + done; + pr_str ppf (String.sub s start (!i - start)); + if !i = len then raise Exit; + if s.[!i] = '-' then + (incr i; if groff then pr_str ppf "\\-" else pr_char ppf '-'); + if (!i < len && is_space s.[!i]) then + (if groff then pr_char ppf ' ' else Format.pp_print_space ppf ()) + done with Exit -> () + + (* Plain text output *) + + let plain_esc c s = match c with 'g' -> "" (* groff specific *) | _ -> s + let pr_indent ppf c = for i = 1 to c do pr_char ppf ' ' done + let pr_plain_blocks subst ppf ts = + let buf = Buffer.create 1024 in + let escape t = escape subst plain_esc buf t in + let pr_tokens ppf t = pr_tokens ppf (escape t) in + let rec aux = function + | [] -> () + | t :: ts -> + begin match t with + | `Noblank -> () + | `P s -> pr ppf "%a@[%a@]@," pr_indent p_indent pr_tokens s + | `S s -> pr ppf "@[%a@]" pr_tokens s + | `I (label, s) -> + let label = escape label in + let ll = String.length label in + pr ppf "@[%a@[%a@]" pr_indent p_indent pr_tokens label; + if s = "" then () else + if ll < l_indent then + pr ppf "%a@[%a@]@]@," pr_indent (l_indent - ll) pr_tokens s + else + pr ppf "@\n%a@[%a@]@]@," + pr_indent (p_indent + l_indent) pr_tokens s + end; + begin match ts with + | `Noblank :: ts -> aux ts + | ts -> Format.pp_print_cut ppf (); aux ts + end + in + aux ts + + let pr_plain_page subst ppf (_, text) = + pr ppf "@[%a@]" (pr_plain_blocks subst) text + + (* Groff output *) + + let groff_esc c s = match c with + | 'i' -> (str "\\fI%s\\fR" s) + | 'b' -> (str "\\fB%s\\fR" s) + | 'p' -> "" (* plain text specific *) + | _ -> s + + let pr_groff_blocks subst ppf text = + let buf = Buffer.create 1024 in + let escape t = escape subst groff_esc buf t in + let pr_tokens ppf t = pr_tokens ~groff:true ppf (escape t) in + let pr_block = function + | `P s -> pr ppf "@\n.P@\n%a" pr_tokens s + | `S s -> pr ppf "@\n.SH %a" pr_tokens s + | `Noblank -> pr ppf "@\n.sp -1" + | `I (l, s) -> pr ppf "@\n.TP 4@\n%a@\n%a" pr_tokens l pr_tokens s + in + List.iter pr_block text + + let pr_groff_page subst ppf ((n, s, a1, a2, a3), t) = + pr ppf ".\\\" Pipe this output to groff -man -Tutf8 | less@\n\ + .\\\"@\n\ + .TH \"%s\" %d \"%s\" \"%s\" \"%s\"@\n\ + .\\\" Disable hyphenantion and ragged-right@\n\ + .nh@\n\ + .ad l\ + %a@?" + n s a1 a2 a3 (pr_groff_blocks subst) t + + (* Printing to a pager *) + + let find_cmd cmds = + let test, null = match Sys.os_type with + | "Win32" -> "where", " NUL" + | _ -> "type", "/dev/null" + in + let cmd c = Sys.command (str "%s %s 1>%s 2>%s" test c null null) = 0 in + try Some (List.find cmd cmds) with Not_found -> None + + let pr_to_pager print ppf v = + let pager = + let cmds = ["less"; "more"] in + let cmds = try (Sys.getenv "PAGER") :: cmds with Not_found -> cmds in + let cmds = try (Sys.getenv "MANPAGER") :: cmds with Not_found -> cmds in + find_cmd cmds + in + match pager with + | None -> print `Plain ppf v + | Some pager -> + let cmd = match (find_cmd ["groff"; "nroff"]) with + | None -> + begin match pr_to_temp_file (print `Plain) v with + | None -> None + | Some f -> Some (str "%s < %s" pager f) + end + | Some c -> + begin match pr_to_temp_file (print `Groff) v with + | None -> None + | Some f -> + (* TODO use -Tutf8, but annoyingly maps U+002D to U+2212. *) + let xroff = if c = "groff" then c ^ " -Tascii -P-c" else c in + Some (str "%s -man < %s | %s" xroff f pager) + end + in + match cmd with + | None -> print `Plain ppf v + | Some cmd -> if (Sys.command cmd) <> 0 then print `Plain ppf v + + let rec print ?(subst = fun x -> x) fmt ppf page = match fmt with + | `Pager -> pr_to_pager (print ~subst) ppf page + | `Plain -> pr_plain_page subst ppf page + | `Groff -> pr_groff_page subst ppf page +end + +module Help = struct + let invocation ?(sep = ' ') ei = match eval_kind ei with + | `Simple | `M_main -> (fst ei.main).name + | `M_choice -> str "%s%c%s" (fst ei.main).name sep (fst ei.term).name + + let title ei = + let prog = String.capitalize (fst ei.main).name in + let name = String.uppercase (invocation ~sep:'-' ei) in + let left_footer = prog ^ match (fst ei.main).version with + | None -> "" | Some v -> str " %s" v + in + let center_header = str "%s Manual" prog in + name, 1, "", left_footer, center_header + + let name_section ei = + let tdoc d = if d = "" then "" else (str " - %s" d) in + [`S "NAME"; `P (str "%s%s" (invocation ~sep:'-' ei) + (tdoc (fst ei.term).tdoc)); ] + + let synopsis ei = match eval_kind ei with + | `M_main -> str "$(b,%s) $(i,COMMAND) ..." (invocation ei) + | `Simple | `M_choice -> + let rev_cmp (p, _) (p', _) = match p', p with (* best effort. *) + | p, All -> -1 | All, p -> 1 + | Left _, Right _ -> -1 | Right _, Left _ -> 1 + | Left (false, k), Nth (false, k') + | Nth (false, k), Nth (false, k') + | Nth (false, k), Right (false, k') -> if k <= k' then -1 else 1 + | Nth (false, k), Left (false, k') + | Right (false, k), Nth (false, k') -> if k >= k' then 1 else -1 + | Left (true, k), Nth (true, k') + | Nth (true, k), Nth (true, k') + | Nth (true, k), Right (true, k') -> if k >= k' then -1 else 1 + | Nth (true, k), Left (true, k') + | Right (true, k), Nth (true, k') -> if k <= k' then 1 else -1 + | p, p' -> compare p p' + in + let rec format_pos acc = function + | a :: al -> + if is_opt a then format_pos acc al else + let v = if a.docv = "" then "$(i,ARG)" else str "$(i,%s)" a.docv in + let v = if a.absent = Error then str "%s" v else str "[%s]" v in + let v = v ^ match a.p_kind with Nth _ -> "" | _ -> "..." in + format_pos ((a.p_kind, v) :: acc) al + | [] -> acc + in + let args = List.sort rev_cmp (format_pos [] (snd ei.term)) in + let args = String.concat " " (List.rev_map snd args) in + str "$(b,%s) [$(i,OPTION)]... %s" (invocation ei) args + + let get_synopsis_section ei = + let rec extract_synopsis syn = function + | `S _ :: _ as man -> List.rev syn, man + | block :: rest -> extract_synopsis (block :: syn) rest + | [] -> List.rev syn, [] + in + match (fst ei.term).man with + | `S "SYNOPSIS" as s :: rest -> extract_synopsis [s] rest (* user-defined *) + | man -> [ `S "SYNOPSIS"; `P (synopsis ei); ], man (* automatic *) + + let make_arg_label a = + if is_pos a then str "$(i,%s)" a.docv else + let fmt_name var = match a.o_kind with + | Flag -> fun n -> str "$(b,%s)" n + | Opt -> + fun n -> + if String.length n > 2 then str "$(b,%s)=$(i,%s)" n var else + str "$(b,%s) $(i,%s)" n var + | Opt_vopt _ -> + fun n -> + if String.length n > 2 then str "$(b,%s)[=$(i,%s)]" n var else + str "$(b,%s) [$(i,%s)]" n var + in + let var = if a.docv = "" then "VAL" else a.docv in + let names = List.sort compare a.o_names in + let s = String.concat ", " (List.rev_map (fmt_name var) names) in + s + + let make_arg_items ei = + let buf = Buffer.create 200 in + let subst_docv docv d = + let subst = function "docv" -> str "$(i,%s)" docv | s -> str "$(%s)" s in + Buffer.clear buf; Buffer.add_substitute buf subst d; Buffer.contents buf + in + let rev_cmp a' a = + let c = compare a.docs a'.docs in + if c <> 0 then c else + match is_opt a, is_opt a' with + | true, true -> + let key names = + let k = String.lowercase (List.hd (List.sort rev_compare names)) in + if k.[1] = '-' then String.sub k 1 (String.length k - 1) else k + in + compare (key a.o_names) (key a'.o_names) + | false, false -> + compare (String.lowercase a.docv) (String.lowercase a'.docv) + | true, false -> -1 + | false, true -> 1 + in + let format a = + let absent = match a.absent with + | Error -> "" + | Val v -> match Lazy.force v with "" -> "" | v -> str "absent=%s" v + in + let optvopt = match a.o_kind with + | Opt_vopt v -> str "default=%s" v + | _ -> "" + in + let argvdoc = match absent, optvopt with + | "", "" -> "" + | s, "" | "", s -> str " (%s)" s + | s, s' -> str " (%s, %s)" s s' + in + (a.docs, `I (make_arg_label a ^ argvdoc, (subst_docv a.docv a.doc))) + in + let is_arg_item a = not (is_pos a && (a.docv = "" || a.doc = "")) in + let l = List.sort rev_cmp (List.filter is_arg_item (snd ei.term)) in + List.rev_map format l + + let make_cmd_items ei = match eval_kind ei with + | `Simple | `M_choice -> [] + | `M_main -> + let add_cmd acc (ti, _) = + (ti.tdocs, `I ((str "$(b,%s)" ti.name), ti.tdoc)) :: acc + in + List.sort rev_compare (List.fold_left add_cmd [] ei.choices) + + let text ei = (* man that code is particulary unreadable. *) + let rec merge_items acc to_insert mark il = function + | `S s as sec :: ts -> + let acc = List.rev_append to_insert acc in + let acc = if mark then sec :: `Orphan_mark :: acc else sec :: acc in + let to_insert, il = List.partition (fun (n, _) -> n = s) il in + let to_insert = List.rev_map (fun (_, i) -> i) to_insert in + let to_insert = (to_insert :> [ `Orphan_mark | Manpage.block] list) in + merge_items acc to_insert (s = "DESCRIPTION") il ts + | t :: ts -> + let t = (t :> [`Orphan_mark | Manpage.block]) in + merge_items (t :: acc) to_insert mark il ts + | [] -> + let acc = List.rev_append to_insert acc in + (if mark then `Orphan_mark :: acc else acc), il + in + let rec merge_orphans acc orphans = function + | `Orphan_mark :: ts -> + let rec merge acc s = function + | [] -> (`S s) :: acc + | (s', i) :: ss -> + let i = (i :> Manpage.block) in + if s = s' then merge (i :: acc) s ss else + merge (i :: (`S s) :: acc) s' ss + in + let acc = match orphans with + | [] -> acc | (s, _) :: _ -> merge acc s orphans + in + merge_orphans acc [] ts + | (#Manpage.block as e) :: ts -> merge_orphans (e :: acc) orphans ts + | [] -> acc + in + let cmds = make_cmd_items ei in + let args = make_arg_items ei in + let cmp (s, _) (s', _) = compare s s' in + let items = List.rev (List.stable_sort cmp (List.rev_append cmds args)) in + let synopsis, man = get_synopsis_section ei in + let rev_text, orphans = + merge_items [`Orphan_mark] [] false items man + in + synopsis @ merge_orphans [] orphans rev_text + + let ei_subst ei = function + | "tname" -> (fst ei.term).name + | "mname" -> (fst ei.main).name + | s -> s + + let man ei = + title ei, (name_section ei) @ (text ei) + + let print fmt ppf ei = Manpage.print ~subst:(ei_subst ei) fmt ppf (man ei) + let pr_synopsis ppf ei = + pr ppf "@[%s@]" + (Manpage.escape (ei_subst ei) + Manpage.plain_esc (Buffer.create 100) (synopsis ei)) + + let pr_version ppf ei = match (fst ei.main).version with + | None -> assert false + | Some v -> pr ppf "@[%a@]@." pr_text v +end + +(* Errors for the command line user *) + +module Err = struct + let invalid kind s exp = str "invalid %s %s, %s" kind (quote s) exp + let invalid_val = invalid "value" + let no kind s = str "no %s %s" (quote s) kind + let not_dir s = str "%s is not a directory" (quote s) + let is_dir s = str "%s is a directory" (quote s) + let element kind s exp = str "invalid element in %s (`%s'): %s" kind s exp + let sep_miss sep s = invalid_val s (str "missing a `%c' separator" sep) + let unknown kind ?(hints = []) v = + let did_you_mean s = str ", did you mean %s ?" s in + let hints = match hints with [] -> "." | hs -> did_you_mean (alts_str hs) in + str "unknown %s %s%s" kind (quote v) hints + + let ambiguous kind s ambs = + str "%s %s ambiguous and could be %s" kind (quote s) (alts_str ambs) + + let pos_excess excess = + str "too many arguments, don't know what to do with %s" + (String.concat ", " (List.map quote excess)) + + let flag_value f v = + str "option %s is a flag, it cannot take the argument %s" + (quote f) (quote v) + + let opt_value_missing f = str "option %s needs an argument" (quote f) + let opt_parse_value f e = str "option %s: %s" (quote f) e + let opt_repeated f f' = + if f = f' then str "option %s cannot be repeated" (quote f) else + str "options %s and %s cannot be present at the same time" (quote f) + (quote f') + + let pos_parse_value a e = + if a.docv = "" then e else match a.p_kind with + | Nth _ -> str "%s argument: %s" a.docv e + | _ -> str "%s... arguments: %s" a.docv e + + let arg_missing a = + if is_opt a then + let rec long_name = function + | n :: l -> if (String.length n) > 2 || l = [] then n else long_name l + | [] -> assert false + in + str "required option %s is missing" (long_name a.o_names) + else + if a.docv = "" then str "a required argument is missing" else + str "required argument %s is missing" a.docv + + (* Error printers *) + + let print ppf ei e = pr ppf "%s: @[%a@]@." (fst ei.main).name pr_text e + let pr_backtrace err ei e bt = + let bt = + let len = String.length bt in + if len > 0 then String.sub bt 0 (len - 1) (* remove final '\n' *) else bt + in + pr err + "%s: @[internal error, uncaught exception:@\n%a@]@." + (fst ei.main).name pr_lines (str "%s\n%s" (Printexc.to_string e) bt) + + let pr_try_help ppf ei = + let exec = Help.invocation ei in + let main = (fst ei.main).name in + if exec = main then + pr ppf "@[<2>Try `%s --help' for more information.@]" exec + else + pr ppf "@[<2>Try `%s --help' or `%s --help' for more information.@]" + exec main + + let pr_usage ppf ei e = + pr ppf "@[%s: @[%a@]@,@[Usage: @[%a@]@]@,%a@]@." + (fst ei.main).name pr_text e Help.pr_synopsis ei pr_try_help ei +end + +(* Command lines. A command line stores pre-parsed information about + the command line's arguments in a more structured way. Given the + [arg_info] values mentionned in a term and Sys.argv (whithout exec + name) we parse the command line into a map of [arg_info] values to + [arg] values. This map is used by the term's closures to retrieve + and convert command line arguments (see the Arg module). *) + +module Cmdline :sig + exception Error of string + val choose_term : term_info -> (term_info * 'a) list -> string list -> + term_info * string list + val create : ?peek_opts:bool -> arg_info list -> string list -> cmdline + val opt_arg : cmdline -> arg_info -> (int * string * (string option)) list + val pos_arg : cmdline -> arg_info -> string list +end = struct + exception Error of string + + let opt_arg cl a = match try Amap.find a cl with Not_found -> assert false + with O l -> l | _ -> assert false + + let pos_arg cl a = match try Amap.find a cl with Not_found -> assert false + with P l -> l | _ -> assert false + + let choose_term ti choices = function + | [] -> ti, [] + | maybe :: args' as args -> + if String.length maybe > 1 && maybe.[0] = '-' then ti, args else + let index = + let add acc (choice, _) = Trie.add acc choice.name choice in + List.fold_left add Trie.empty choices + in + match Trie.find index maybe with + | `Ok choice -> choice, args' + | `Not_found -> + let all = Trie.ambiguities index "" in + let hints = suggest maybe all in + raise (Error (Err.unknown "command" ~hints maybe)) + | `Ambiguous -> + let ambs = List.sort compare (Trie.ambiguities index maybe) in + raise (Error (Err.ambiguous "command" maybe ambs)) + + let arg_info_indexes al = + (* from [al] returns a trie mapping the names of optional arguments to + their arg_info, a list with all arg_info for positional arguments and + a cmdline mapping each arg_info to an empty [arg]. *) + let rec aux opti posi cl = function + | a :: l -> + if is_pos a then aux opti (a :: posi) (Amap.add a (P []) cl) l else + let add t name = Trie.add t name a in + aux (List.fold_left add opti a.o_names) posi (Amap.add a (O []) cl) l + | [] -> opti, posi, cl + in + aux Trie.empty [] Amap.empty al + + let parse_opt_arg s = (* (name,value) of opt arg, assert len > 1. *) + let l = String.length s in + if s.[1] <> '-' then + if l = 2 then s, None else + String.sub s 0 2, Some (String.sub s 2 (l - 2)) + else try + let i = String.index s '=' in + String.sub s 0 i, Some (String.sub s (i + 1) (l - i - 1)) + with Not_found -> s, None + + let parse_args ~peek_opts opti cl args = + (* returns an updated [cl] cmdline according to the options found in [args] + with the trie index [opti]. Positional arguments are returned in order + in a list. *) + let rec aux k opti cl pargs = function + | [] -> cl, (List.rev pargs) + | "--" :: args -> cl, (List.rev_append pargs args) + | s :: args -> + let is_opt s = String.length s > 1 && s.[0] = '-' in + let is_short_opt s = String.length s = 2 && s.[0] = '-' in + if not (is_opt s) then aux (k+1) opti cl (s :: pargs) args else + let name, value = parse_opt_arg s in + match Trie.find opti name with + | `Ok a -> + let value, args = match value, a.o_kind with + | Some v, Flag when is_short_opt name -> None, ("-" ^ v) :: args + | Some v, _ -> value, args + | None, Flag -> value, args + | None, _ -> + match args with + | v :: rest -> if is_opt v then None, args else Some v, rest + | [] -> None, args + in + let arg = O ((k, name, value) :: opt_arg cl a) in + aux (k+1) opti (Amap.add a arg cl) pargs args + | `Not_found when peek_opts -> aux (k+1) opti cl pargs args (* skip *) + | `Not_found -> + let hints = + if String.length s <= 2 then [] else + let short_opt, long_opt = + if s.[1] <> '-' + then s, Printf.sprintf "-%s" s + else String.sub s 1 (String.length s - 1), s + in + let short_opt, _ = parse_opt_arg short_opt in + let long_opt, _ = parse_opt_arg long_opt in + let all = Trie.ambiguities opti "-" in + match List.mem short_opt all, suggest long_opt all with + | false, [] -> [] + | false, l -> l + | true, [] -> [short_opt] + | true, l -> if List.mem short_opt l then l else short_opt :: l + in + raise (Error (Err.unknown "option" ~hints name)) + | `Ambiguous -> + let ambs = List.sort compare (Trie.ambiguities opti name) in + raise (Error (Err.ambiguous "option" name ambs)) + in + aux 0 opti cl [] args + + let process_pos_args posi cl pargs = + (* returns an updated [cl] cmdline in which each positional arg mentionned + in the list index posi, is given a value according the list + of positional arguments values [pargs]. *) + if pargs = [] then cl else + let rec take n acc l = + if n = 0 then List.rev acc else + take (n - 1) (List.hd l :: acc) (List.tl l) + in + let rec aux pargs last cl max_spec = function + | a :: al -> + let arg, max_spec = match a.p_kind with + | All -> P pargs, last + | Nth (rev, k) -> + let k = if rev then last - k else k in + let max_spec = max k max_spec in + if k < 0 || k > last then P [], max_spec else + P ([List.nth pargs k]), max_spec + | Left (rev, k) -> + let k = if rev then last - k else k in + let max_spec = max k max_spec in + if k <= 0 || k > last then P [], max_spec else + P (take k [] pargs), max_spec + | Right (rev, k) -> + let k = if rev then last - k else k in + if k < 0 || k >= last then P [], last else + P (List.rev (take (last - k) [] (List.rev pargs))), last + in + aux pargs last (Amap.add a arg cl) max_spec al + | [] -> cl, max_spec + in + let last = List.length pargs - 1 in + let cl, max_spec = aux pargs last cl (-1) posi in + if last <= max_spec then cl else + let excess = List.rev (take (last - max_spec) [] (List.rev pargs)) in + raise (Error (Err.pos_excess excess)) + + let create ?(peek_opts = false) al args = + let opti, posi, cl = arg_info_indexes al in + let cl, pargs = parse_args ~peek_opts opti cl args in + if peek_opts then cl (* skip positional arguments *) else + process_pos_args posi cl pargs +end + +module Arg = struct + type 'a parser = string -> [ `Ok of 'a | `Error of string ] + type 'a printer = Format.formatter -> 'a -> unit + type 'a converter = 'a parser * 'a printer + type 'a arg_converter = (eval_info -> cmdline -> 'a) + type 'a t = arg_info list * 'a arg_converter + type info = arg_info + + let ( & ) f x = f x + let parse_error e = raise (Cmdline.Error e) + let some ?(none = "") (parse, print) = + (fun s -> match parse s with `Ok v -> `Ok (Some v) | `Error _ as e -> e), + (fun ppf v -> match v with None -> pr_str ppf none| Some v -> print ppf v) + + let info ?docs ?(docv = "") ?(doc = "") names = + let dash n = if String.length n = 1 then "-" ^ n else "--" ^ n in + let docs = match docs with + | None -> if names = [] then "ARGUMENTS" else "OPTIONS" + | Some s -> s + in + { id = arg_id (); absent = Val (Lazy.from_val ""); + doc = doc; docv = docv; docs = docs; + p_kind = All; o_kind = Flag; o_names = List.rev_map dash names; + o_all = false; } + + let flag a = + if is_pos a then invalid_arg err_not_opt else + let convert _ cl = match Cmdline.opt_arg cl a with + | [] -> false + | [_, _, None] -> true + | [_, f, Some v] -> parse_error (Err.flag_value f v) + | (_, f, _) :: (_ ,g, _) :: _ -> parse_error (Err.opt_repeated f g) + in + [a], convert + + let flag_all a = + if is_pos a then invalid_arg err_not_opt else + let a = { a with o_all = true } in + let convert _ cl = match Cmdline.opt_arg cl a with + | [] -> [] + | l -> + let truth (_, f, v) = match v with + | None -> true | Some v -> parse_error (Err.flag_value f v) + in + List.rev_map truth l + in + [a], convert + + let vflag v l = + let convert _ cl = + let rec aux fv = function + | (v, a) :: rest -> + begin match Cmdline.opt_arg cl a with + | [] -> aux fv rest + | [_, f, None] -> + begin match fv with + | None -> aux (Some (f, v)) rest + | Some (g, _) -> parse_error (Err.opt_repeated g f) + end + | [_, f, Some v] -> parse_error (Err.flag_value f v) + | (_, f, _) :: (_, g, _) :: _ -> parse_error (Err.opt_repeated g f) + end + | [] -> match fv with None -> v | Some (_, v) -> v + in + aux None l + in + let flag (_, a) = if is_pos a then invalid_arg err_not_opt else a in + List.rev_map flag l, convert + + let vflag_all v l = + let convert _ cl = + let rec aux acc = function + | (fv, a) :: rest -> + begin match Cmdline.opt_arg cl a with + | [] -> aux acc rest + | l -> + let fval (k, f, v) = match v with + | None -> (k, fv) | Some v -> parse_error (Err.flag_value f v) + in + aux (List.rev_append (List.rev_map fval l) acc) rest + end + | [] -> + if acc = [] then v else List.rev_map snd (List.sort rev_compare acc) + in + aux [] l + in + let flag (_, a) = + if is_pos a then invalid_arg err_not_opt else { a with o_all = true } + in + List.rev_map flag l, convert + + let parse_opt_value parse f v = match parse v with + | `Ok v -> v | `Error e -> parse_error (Err.opt_parse_value f e) + + let opt ?vopt (parse, print) v a = + if is_pos a then invalid_arg err_not_opt else + let a = { a with absent = Val (lazy (str_of_pp print v)); + o_kind = match vopt with + | None -> Opt | Some dv -> Opt_vopt (str_of_pp print dv) } + in + let convert _ cl = match Cmdline.opt_arg cl a with + | [] -> v + | [_, f, Some v] -> parse_opt_value parse f v + | [_, f, None] -> + begin match vopt with + | None -> parse_error (Err.opt_value_missing f) + | Some optv -> optv + end + | (_, f, _) :: (_, g, _) :: _ -> parse_error (Err.opt_repeated g f) + in + [a], convert + + let opt_all ?vopt (parse, print) v a = + if is_pos a then invalid_arg err_not_opt else + let a = { a with absent = Val (Lazy.from_val ""); o_all = true; + o_kind = match vopt with + | None -> Opt | Some dv -> Opt_vopt (str_of_pp print dv) } + in + let convert _ cl = match Cmdline.opt_arg cl a with + | [] -> v + | l -> + let parse (k, f, v) = match v with + | Some v -> (k, parse_opt_value parse f v) + | None -> match vopt with + | None -> parse_error (Err.opt_value_missing f) + | Some dv -> (k, dv) + in + List.rev_map snd (List.sort rev_compare (List.rev_map parse l)) + in + [a], convert + + (* Positional arguments *) + + let parse_pos_value parse a v = match parse v with + | `Ok v -> v | `Error e -> parse_error (Err.pos_parse_value a e) + + let pos ?(rev = false) k (parse, print) v a = + if is_opt a then invalid_arg err_not_pos else + let a = { a with p_kind = Nth (rev, k); + absent = Val (Lazy.from_val (str_of_pp print v)) } + in + let convert _ cl = match Cmdline.pos_arg cl a with + | [] -> v + | [v] -> parse_pos_value parse a v + | _ -> assert false + in + [a], convert + + let pos_list kind (parse, _) v a = + if is_opt a then invalid_arg err_not_pos else + let a = { a with p_kind = kind } in + let convert _ cl = match Cmdline.pos_arg cl a with + | [] -> v + | l -> List.rev (List.rev_map (parse_pos_value parse a) l) + in + [a], convert + + let pos_all c v a = pos_list All c v a + let pos_left ?(rev = false) k = pos_list (Left (rev, k)) + let pos_right ?(rev = false) k = pos_list (Right (rev, k)) + + (* Arguments as terms *) + + let absent_error al = List.rev_map (fun a -> { a with absent = Error }) al + let value a = a + let required (al, convert) = + let al = absent_error al in + let convert ei cl = match convert ei cl with + | Some v -> v + | None -> parse_error (Err.arg_missing (List.hd al)) + in + al, convert + + let non_empty (al, convert) = + let al = absent_error al in + let convert ei cl = match convert ei cl with + | [] -> parse_error (Err.arg_missing (List.hd al)) + | l -> l + in + al, convert + + let last (al, convert) = + let convert ei cl = match convert ei cl with + | [] -> parse_error (Err.arg_missing (List.hd al)) + | l -> List.hd (List.rev l) + in + al, convert + + (* Predefined converters. *) + + let bool = + (fun s -> try `Ok (bool_of_string s) with Invalid_argument _ -> + `Error (Err.invalid_val s (alts_str ["true"; "false"]))), + Format.pp_print_bool + + let char = + (fun s -> if String.length s = 1 then `Ok s.[0] else + `Error (Err.invalid_val s "expected a character")), + pr_char + + let parse_with t_of_str exp s = + try `Ok (t_of_str s) with Failure _ -> `Error (Err.invalid_val s exp) + + let int = + parse_with int_of_string "expected an integer", Format.pp_print_int + + let int32 = + parse_with Int32.of_string "expected a 32-bit integer", + (fun ppf -> pr ppf "%ld") + + let int64 = + parse_with Int64.of_string "expected a 64-bit integer", + (fun ppf -> pr ppf "%Ld") + + let nativeint = + parse_with Nativeint.of_string "expected a processor-native integer", + (fun ppf -> pr ppf "%nd") + + let float = + parse_with float_of_string "expected a floating point number", + Format.pp_print_float + + let string = (fun s -> `Ok s), pr_str + let enum sl = + if sl = [] then invalid_arg err_empty_list else + let sl_inv = List.rev_map (fun (s,v) -> (v,s)) sl in + let print ppf v = pr_str ppf (List.assoc v sl_inv) in + let t = Trie.of_list sl in + let parse s = match Trie.find t s with + | `Ok _ as r -> r + | `Ambiguous -> + let ambs = List.sort compare (Trie.ambiguities t s) in + `Error (Err.ambiguous "enum value" s ambs) + | `Not_found -> + let alts = List.rev (List.rev_map (fun (s, _) -> s) sl) in + `Error (Err.invalid_val s ("expected " ^ (alts_str alts))) + in + parse, print + + let file = + (fun s -> if Sys.file_exists s then `Ok s else + `Error (Err.no "file or directory" s)), + pr_str + + let dir = + (fun s -> + if Sys.file_exists s then + if Sys.is_directory s then `Ok s else `Error (Err.not_dir s) + else + `Error (Err.no "directory" s)), + pr_str + + let non_dir_file = + (fun s -> + if Sys.file_exists s then + if not (Sys.is_directory s) then `Ok s else `Error (Err.is_dir s) + else + `Error (Err.no "file" s)), + pr_str + + let split_and_parse sep parse s = + let parse sub = match parse sub with + | `Error e -> failwith e | `Ok v -> v in + let rec split accum j = + let i = try String.rindex_from s j sep with Not_found -> -1 in + if (i = -1) then + let p = String.sub s 0 (j + 1) in + if p <> "" then parse p :: accum else accum + else + let p = String.sub s (i + 1) (j - i) in + let accum' = if p <> "" then parse p :: accum else accum in + split accum' (i - 1) + in + split [] (String.length s - 1) + + let list ?(sep = ',') (parse, pr_e) = + let parse s = try `Ok (split_and_parse sep parse s) with + | Failure e -> `Error (Err.element "list" s e) + in + let rec print ppf = function + | v :: l -> pr_e ppf v; if (l <> []) then (pr_char ppf sep; print ppf l) + | [] -> () + in + parse, print + + let array ?(sep = ',') (parse, pr_e) = + let parse s = try `Ok (Array.of_list (split_and_parse sep parse s)) with + | Failure e -> `Error (Err.element "array" s e) + in + let print ppf v = + let max = Array.length v - 1 in + for i = 0 to max do pr_e ppf v.(i); if i <> max then pr_char ppf sep done + in + parse, print + + let split_left sep s = + try + let i = String.index s sep in + let len = String.length s in + Some ((String.sub s 0 i), (String.sub s (i + 1) (len - i - 1))) + with Not_found -> None + + let pair ?(sep = ',') (pa0, pr0) (pa1, pr1) = + let parser s = match split_left sep s with + | None -> `Error (Err.sep_miss sep s) + | Some (v0, v1) -> + match pa0 v0, pa1 v1 with + | `Ok v0, `Ok v1 -> `Ok (v0, v1) + | `Error e, _ | _, `Error e -> `Error (Err.element "pair" s e) + in + let printer ppf (v0, v1) = pr ppf "%a%c%a" pr0 v0 sep pr1 v1 in + parser, printer + + let t2 = pair + let t3 ?(sep = ',') (pa0, pr0) (pa1, pr1) (pa2, pr2) = + let parse s = match split_left sep s with + | None -> `Error (Err.sep_miss sep s) + | Some (v0, s) -> + match split_left sep s with + | None -> `Error (Err.sep_miss sep s) + | Some (v1, v2) -> + match pa0 v0, pa1 v1, pa2 v2 with + | `Ok v0, `Ok v1, `Ok v2 -> `Ok (v0, v1, v2) + | `Error e, _, _ | _, `Error e, _ | _, _, `Error e -> + `Error (Err.element "triple" s e) + in + let print ppf (v0, v1, v2) = + pr ppf "%a%c%a%c%a" pr0 v0 sep pr1 v1 sep pr2 v2 + in + parse, print + + let t4 ?(sep = ',') (pa0, pr0) (pa1, pr1) (pa2, pr2) (pa3, pr3) = + let parse s = match split_left sep s with + | None -> `Error (Err.sep_miss sep s) + | Some(v0, s) -> + match split_left sep s with + | None -> `Error (Err.sep_miss sep s) + | Some (v1, s) -> + match split_left sep s with + | None -> `Error (Err.sep_miss sep s) + | Some (v2, v3) -> + match pa0 v0, pa1 v1, pa2 v2, pa3 v3 with + | `Ok v1, `Ok v2, `Ok v3, `Ok v4 -> `Ok (v1, v2, v3, v4) + | `Error e, _, _, _ | _, `Error e, _, _ | _, _, `Error e, _ + | _, _, _, `Error e -> `Error (Err.element "quadruple" s e) + in + let print ppf (v0, v1, v2, v3) = + pr ppf "%a%c%a%c%a%c%a" pr0 v0 sep pr1 v1 sep pr2 v2 sep pr3 v3 + in + parse, print + + (* Documentation formatting helpers *) + + let doc_quote = quote + let doc_alts = alts_str + let doc_alts_enum ?quoted enum = alts_str ?quoted (List.map fst enum) +end + +module Term = struct + type info = term_info + type +'a t = arg_info list * (eval_info -> cmdline -> 'a) + type 'a result = [ + | `Ok of 'a | `Error of [`Parse | `Term | `Exn ] | `Version | `Help ] + + exception Term of + [ `Help of [`Pager | `Plain | `Groff] * string option + | `Error of bool * string ] + + let info ?(sdocs = "OPTIONS") ?(man = []) ?(docs = "COMMANDS") ?(doc = "") + ?version name = + { name = name; version = version; tdoc = doc; tdocs = docs; sdocs = sdocs; + man = man } + + let name ti = ti.name + let pure v = [], (fun _ _ -> v) + let app (al, f) (al', v) = + List.rev_append al al', + fun ei cl -> (f ei cl) (v ei cl) + + let ( $ ) = app + + type 'a ret = + [ `Help of [`Pager | `Plain | `Groff] * string option + | `Error of (bool * string) + | `Ok of 'a ] + + let ret (al, v) = + al, fun ei cl -> match v ei cl with + | `Ok v -> v + | `Error (u,e) -> raise (Term (`Error (u,e))) + | `Help h -> raise (Term (`Help h)) + + let main_name = [], (fun ei _ -> (fst ei.main).name) + let choice_names = + [], fun ei _ -> List.rev_map (fun e -> (fst e).name) ei.choices + + let man_format = + let fmts = ["pager", `Pager; "groff", `Groff; "plain", `Plain] in + let doc = "Show output in format $(docv) (pager, plain or groff)."in + Arg.(value & opt (enum fmts) `Pager & info ["man-format"] ~docv:"FMT" ~doc) + + (* Evaluation *) + + let remove_exec argv = + try List.tl (Array.to_list argv) with Failure _ -> invalid_arg err_argv + + let add_std_opts ei = + let docs = (fst ei.term).sdocs in + let args, v_lookup = + if (fst ei.main).version = None then [], None else + let (a, lookup) = + Arg.flag (Arg.info ["version"] ~docs ~doc:"Show version information.") + in + a, Some lookup + in + let args, h_lookup = + let (a, lookup) = + let fmt = Arg.enum ["pager",`Pager; "groff",`Groff; "plain",`Plain] in + let doc = "Show this help in format $(docv) (pager, plain or groff)."in + let a = Arg.info ["help"] ~docv:"FMT" ~docs ~doc in + Arg.opt ~vopt:(Some `Pager) (Arg.some fmt) None a + in + List.rev_append a args, lookup + in + h_lookup, v_lookup, + { ei with term = (fst ei.term), List.rev_append args (snd ei.term) } + + let eval_term help err ei f args = + let help_arg, vers_arg, ei = add_std_opts ei in + try + let cl = Cmdline.create (snd ei.term) args in + match help_arg ei cl, vers_arg with + | Some fmt, _ -> Help.print fmt help ei; `Help + | None, Some v_arg when v_arg ei cl -> Help.pr_version help ei; `Version + | _ -> `Ok (f ei cl) + with + | Cmdline.Error e -> Err.pr_usage err ei e; `Error `Parse + | Term (`Error (usage, e)) -> + if usage then Err.pr_usage err ei e else Err.print err ei e; + `Error `Term + | Term (`Help (fmt, cmd)) -> + let ei = match cmd with + | Some cmd -> + let cmd = + try List.find (fun (i, _) -> i.name = cmd) ei.choices + with Not_found -> invalid_arg (err_help cmd) + in + {ei with term = cmd } + | None -> { ei with term = ei.main } + in + let _, _, ei = add_std_opts ei in + Help.print fmt help ei; `Help + + let eval ?(help = Format.std_formatter) ?(err = Format.err_formatter) + ?(catch = true) ?(argv = Sys.argv) ((al, f), ti) = + let term = ti, al in + let ei = { term = term; main = term; choices = [] } in + try eval_term help err ei f (remove_exec argv) with + | e when catch -> + Err.pr_backtrace err ei e (Printexc.get_backtrace ()); `Error `Exn + + let eval_choice ?(help = Format.std_formatter) ?(err = Format.err_formatter) + ?(catch = true) ?(argv = Sys.argv) (((al, f) as t), ti) choices = + let ei_choices = List.rev_map (fun ((al, _), ti) -> ti, al) choices in + let main = (ti, al) in + let ei = { term = main; main = main; choices = ei_choices } in + try + let chosen, args = Cmdline.choose_term ti ei_choices (remove_exec argv) in + let find_chosen (_, ti) = ti = chosen in + let (al, f), _ = List.find find_chosen ((t, ti) :: choices) in + let ei = { ei with term = (chosen, al) } in + eval_term help err ei f args + with + | Cmdline.Error e -> (* may be raised by choose_term. *) + Err.pr_usage err ei e; `Error `Parse + | e when catch -> + Err.pr_backtrace err ei e (Printexc.get_backtrace ()); `Error `Exn + + let eval_peek_opts ?(version_opt = false) ?(argv = Sys.argv) (al, f) = + let args = remove_exec argv in + let version = if version_opt then Some "dummy" else None in + let term = info ?version "dummy", al in + let ei = { term = term; main = term; choices = [] } in + let help_arg, vers_arg, ei = add_std_opts ei in + try + let cl = Cmdline.create ~peek_opts:true (snd ei.term) args in + match help_arg ei cl, vers_arg with + | Some fmt, _ -> + (try (Some (f ei cl), `Help) with e -> None, `Help) + | None, Some v_arg when v_arg ei cl -> + (try (Some (f ei cl), `Version) with e -> None, `Version) + | _ -> + let v = f ei cl in + Some v, `Ok v + with + | Cmdline.Error _ -> None, (`Error `Parse) + | Term _ -> None, (`Error `Term) + | e -> None, (`Error `Exn) +end + +(*--------------------------------------------------------------------------- + Copyright (c) 2011 Daniel C. Bünzli + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + 3. Neither the name of Daniel C. Bünzli nor the names of + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ---------------------------------------------------------------------------*) diff --git a/samples/OCaml/common.ml b/samples/OCaml/common.ml new file mode 100644 index 00000000..8f3a9260 --- /dev/null +++ b/samples/OCaml/common.ml @@ -0,0 +1,14 @@ +(* + * Copyright (c) 2013 Jeremy Yallop. + * + * This file is distributed under the terms of the MIT License. + * See the file LICENSE for details. + *) + +let string_of format v = + let buf = Buffer.create 100 in + let fmt = Format.formatter_of_buffer buf in begin + format fmt v; + Format.pp_print_flush fmt (); + Buffer.contents buf + end diff --git a/samples/OCaml/date.ml b/samples/OCaml/date.ml new file mode 100644 index 00000000..70f46d3c --- /dev/null +++ b/samples/OCaml/date.ml @@ -0,0 +1,40 @@ +(* + * Copyright (c) 2013 Jeremy Yallop. + * + * This file is distributed under the terms of the MIT License. + * See the file LICENSE for details. + *) + +open Ctypes +open PosixTypes +open Foreign + +type tm +let tm = structure "tm" +let (-:) ty label = field tm label ty +let tm_sec = int -: "tm_sec" (* seconds *) +let tm_min = int -: "tm_min" (* minutes *) +let tm_hour = int -: "tm_hour" (* hours *) +let tm_mday = int -: "tm_mday" (* day of the month *) +let tm_mon = int -: "tm_mon" (* month *) +let tm_year = int -: "tm_year" (* year *) +let tm_wday = int -: "tm_wday" (* day of the week *) +let tm_yday = int -: "tm_yday" (* day in the year *) +let tm_isdst = int -: "tm_isdst" (* daylight saving time *) +let () = seal (tm : tm structure typ) + +let time = foreign "time" ~check_errno:true (ptr time_t @-> returning time_t) + +let asctime = foreign "asctime" (ptr tm @-> returning string) + +let localtime = foreign "localtime" (ptr time_t @-> returning (ptr tm)) + +let () = begin + let timep = allocate_n ~count:1 time_t in + let time = time timep in + assert (time = !@timep); + let tm = localtime timep in + Printf.printf "tm.tm_mon = %d\n" (getf !@tm tm_mon); + Printf.printf "tm.tm_year = %d\n" (getf !@tm tm_year); + print_endline (asctime tm) +end diff --git a/samples/OCaml/map.ml b/samples/OCaml/map.ml new file mode 100644 index 00000000..7d65bc6b --- /dev/null +++ b/samples/OCaml/map.ml @@ -0,0 +1,337 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the GNU Library General Public License, with *) +(* the special exception on linking described in file ../LICENSE. *) +(* *) +(***********************************************************************) + +module type OrderedType = + sig + type t + val compare: t -> t -> int + end + +module type S = + sig + type key + type +'a t + val empty: 'a t + val is_empty: 'a t -> bool + val mem: key -> 'a t -> bool + val add: key -> 'a -> 'a t -> 'a t + val singleton: key -> 'a -> 'a t + val remove: key -> 'a t -> 'a t + val merge: + (key -> 'a option -> 'b option -> 'c option) -> 'a t -> 'b t -> 'c t + val compare: ('a -> 'a -> int) -> 'a t -> 'a t -> int + val equal: ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + val iter: (key -> 'a -> unit) -> 'a t -> unit + val fold: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b + val for_all: (key -> 'a -> bool) -> 'a t -> bool + val exists: (key -> 'a -> bool) -> 'a t -> bool + val filter: (key -> 'a -> bool) -> 'a t -> 'a t + val partition: (key -> 'a -> bool) -> 'a t -> 'a t * 'a t + val cardinal: 'a t -> int + val bindings: 'a t -> (key * 'a) list + val min_binding: 'a t -> (key * 'a) + val max_binding: 'a t -> (key * 'a) + val choose: 'a t -> (key * 'a) + val split: key -> 'a t -> 'a t * 'a option * 'a t + val find: key -> 'a t -> 'a + val map: ('a -> 'b) -> 'a t -> 'b t + val mapi: (key -> 'a -> 'b) -> 'a t -> 'b t + end + +module Make(Ord: OrderedType) = struct + + type key = Ord.t + + type 'a t = + Empty + | Node of 'a t * key * 'a * 'a t * int + + let height = function + Empty -> 0 + | Node(_,_,_,_,h) -> h + + let create l x d r = + let hl = height l and hr = height r in + Node(l, x, d, r, (if hl >= hr then hl + 1 else hr + 1)) + + let singleton x d = Node(Empty, x, d, Empty, 1) + + let bal l x d r = + let hl = match l with Empty -> 0 | Node(_,_,_,_,h) -> h in + let hr = match r with Empty -> 0 | Node(_,_,_,_,h) -> h in + if hl > hr + 2 then begin + match l with + Empty -> invalid_arg "Map.bal" + | Node(ll, lv, ld, lr, _) -> + if height ll >= height lr then + create ll lv ld (create lr x d r) + else begin + match lr with + Empty -> invalid_arg "Map.bal" + | Node(lrl, lrv, lrd, lrr, _)-> + create (create ll lv ld lrl) lrv lrd (create lrr x d r) + end + end else if hr > hl + 2 then begin + match r with + Empty -> invalid_arg "Map.bal" + | Node(rl, rv, rd, rr, _) -> + if height rr >= height rl then + create (create l x d rl) rv rd rr + else begin + match rl with + Empty -> invalid_arg "Map.bal" + | Node(rll, rlv, rld, rlr, _) -> + create (create l x d rll) rlv rld (create rlr rv rd rr) + end + end else + Node(l, x, d, r, (if hl >= hr then hl + 1 else hr + 1)) + + let empty = Empty + + let is_empty = function Empty -> true | _ -> false + + let rec add x data = function + Empty -> + Node(Empty, x, data, Empty, 1) + | Node(l, v, d, r, h) -> + let c = Ord.compare x v in + if c = 0 then + Node(l, x, data, r, h) + else if c < 0 then + bal (add x data l) v d r + else + bal l v d (add x data r) + + let rec find x = function + Empty -> + raise Not_found + | Node(l, v, d, r, _) -> + let c = Ord.compare x v in + if c = 0 then d + else find x (if c < 0 then l else r) + + let rec mem x = function + Empty -> + false + | Node(l, v, d, r, _) -> + let c = Ord.compare x v in + c = 0 || mem x (if c < 0 then l else r) + + let rec min_binding = function + Empty -> raise Not_found + | Node(Empty, x, d, r, _) -> (x, d) + | Node(l, x, d, r, _) -> min_binding l + + let rec max_binding = function + Empty -> raise Not_found + | Node(l, x, d, Empty, _) -> (x, d) + | Node(l, x, d, r, _) -> max_binding r + + let rec remove_min_binding = function + Empty -> invalid_arg "Map.remove_min_elt" + | Node(Empty, x, d, r, _) -> r + | Node(l, x, d, r, _) -> bal (remove_min_binding l) x d r + + let merge t1 t2 = + match (t1, t2) with + (Empty, t) -> t + | (t, Empty) -> t + | (_, _) -> + let (x, d) = min_binding t2 in + bal t1 x d (remove_min_binding t2) + + let rec remove x = function + Empty -> + Empty + | Node(l, v, d, r, h) -> + let c = Ord.compare x v in + if c = 0 then + merge l r + else if c < 0 then + bal (remove x l) v d r + else + bal l v d (remove x r) + + let rec iter f = function + Empty -> () + | Node(l, v, d, r, _) -> + iter f l; f v d; iter f r + + let rec map f = function + Empty -> + Empty + | Node(l, v, d, r, h) -> + let l' = map f l in + let d' = f d in + let r' = map f r in + Node(l', v, d', r', h) + + let rec mapi f = function + Empty -> + Empty + | Node(l, v, d, r, h) -> + let l' = mapi f l in + let d' = f v d in + let r' = mapi f r in + Node(l', v, d', r', h) + + let rec fold f m accu = + match m with + Empty -> accu + | Node(l, v, d, r, _) -> + fold f r (f v d (fold f l accu)) + + let rec for_all p = function + Empty -> true + | Node(l, v, d, r, _) -> p v d && for_all p l && for_all p r + + let rec exists p = function + Empty -> false + | Node(l, v, d, r, _) -> p v d || exists p l || exists p r + + (* Beware: those two functions assume that the added k is *strictly* + smaller (or bigger) than all the present keys in the tree; it + does not test for equality with the current min (or max) key. + + Indeed, they are only used during the "join" operation which + respects this precondition. + *) + + let rec add_min_binding k v = function + | Empty -> singleton k v + | Node (l, x, d, r, h) -> + bal (add_min_binding k v l) x d r + + let rec add_max_binding k v = function + | Empty -> singleton k v + | Node (l, x, d, r, h) -> + bal l x d (add_max_binding k v r) + + (* Same as create and bal, but no assumptions are made on the + relative heights of l and r. *) + + let rec join l v d r = + match (l, r) with + (Empty, _) -> add_min_binding v d r + | (_, Empty) -> add_max_binding v d l + | (Node(ll, lv, ld, lr, lh), Node(rl, rv, rd, rr, rh)) -> + if lh > rh + 2 then bal ll lv ld (join lr v d r) else + if rh > lh + 2 then bal (join l v d rl) rv rd rr else + create l v d r + + (* Merge two trees l and r into one. + All elements of l must precede the elements of r. + No assumption on the heights of l and r. *) + + let concat t1 t2 = + match (t1, t2) with + (Empty, t) -> t + | (t, Empty) -> t + | (_, _) -> + let (x, d) = min_binding t2 in + join t1 x d (remove_min_binding t2) + + let concat_or_join t1 v d t2 = + match d with + | Some d -> join t1 v d t2 + | None -> concat t1 t2 + + let rec split x = function + Empty -> + (Empty, None, Empty) + | Node(l, v, d, r, _) -> + let c = Ord.compare x v in + if c = 0 then (l, Some d, r) + else if c < 0 then + let (ll, pres, rl) = split x l in (ll, pres, join rl v d r) + else + let (lr, pres, rr) = split x r in (join l v d lr, pres, rr) + + let rec merge f s1 s2 = + match (s1, s2) with + (Empty, Empty) -> Empty + | (Node (l1, v1, d1, r1, h1), _) when h1 >= height s2 -> + let (l2, d2, r2) = split v1 s2 in + concat_or_join (merge f l1 l2) v1 (f v1 (Some d1) d2) (merge f r1 r2) + | (_, Node (l2, v2, d2, r2, h2)) -> + let (l1, d1, r1) = split v2 s1 in + concat_or_join (merge f l1 l2) v2 (f v2 d1 (Some d2)) (merge f r1 r2) + | _ -> + assert false + + let rec filter p = function + Empty -> Empty + | Node(l, v, d, r, _) -> + (* call [p] in the expected left-to-right order *) + let l' = filter p l in + let pvd = p v d in + let r' = filter p r in + if pvd then join l' v d r' else concat l' r' + + let rec partition p = function + Empty -> (Empty, Empty) + | Node(l, v, d, r, _) -> + (* call [p] in the expected left-to-right order *) + let (lt, lf) = partition p l in + let pvd = p v d in + let (rt, rf) = partition p r in + if pvd + then (join lt v d rt, concat lf rf) + else (concat lt rt, join lf v d rf) + + type 'a enumeration = End | More of key * 'a * 'a t * 'a enumeration + + let rec cons_enum m e = + match m with + Empty -> e + | Node(l, v, d, r, _) -> cons_enum l (More(v, d, r, e)) + + let compare cmp m1 m2 = + let rec compare_aux e1 e2 = + match (e1, e2) with + (End, End) -> 0 + | (End, _) -> -1 + | (_, End) -> 1 + | (More(v1, d1, r1, e1), More(v2, d2, r2, e2)) -> + let c = Ord.compare v1 v2 in + if c <> 0 then c else + let c = cmp d1 d2 in + if c <> 0 then c else + compare_aux (cons_enum r1 e1) (cons_enum r2 e2) + in compare_aux (cons_enum m1 End) (cons_enum m2 End) + + let equal cmp m1 m2 = + let rec equal_aux e1 e2 = + match (e1, e2) with + (End, End) -> true + | (End, _) -> false + | (_, End) -> false + | (More(v1, d1, r1, e1), More(v2, d2, r2, e2)) -> + Ord.compare v1 v2 = 0 && cmp d1 d2 && + equal_aux (cons_enum r1 e1) (cons_enum r2 e2) + in equal_aux (cons_enum m1 End) (cons_enum m2 End) + + let rec cardinal = function + Empty -> 0 + | Node(l, _, _, r, _) -> cardinal l + 1 + cardinal r + + let rec bindings_aux accu = function + Empty -> accu + | Node(l, v, d, r, _) -> bindings_aux ((v, d) :: bindings_aux accu r) l + + let bindings s = + bindings_aux [] s + + let choose = min_binding + +end diff --git a/samples/OCaml/mirage.ml b/samples/OCaml/mirage.ml new file mode 100644 index 00000000..4878abed --- /dev/null +++ b/samples/OCaml/mirage.ml @@ -0,0 +1,2503 @@ +(* + * Copyright (c) 2013 Thomas Gazagnaire + * Copyright (c) 2013 Anil Madhavapeddy + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *) + +open Mirage_misc + +module StringSet = struct + + include Set.Make(String) + + let of_list l = + let s = ref empty in + List.iter (fun e -> s := add e !s) l; + !s + +end + +let main_ml = ref None + +let append_main fmt = + match !main_ml with + | None -> failwith "main_ml" + | Some oc -> append oc fmt + +let newline_main () = + match !main_ml with + | None -> failwith "main_ml" + | Some oc -> newline oc + +let set_main_ml file = + let oc = open_out file in + main_ml := Some oc + +type mode = [ + | `Unix + | `Xen + | `MacOSX +] + +let string_of_mode = + function + | `Unix -> "Unix" + | `Xen -> "Xen" + | `MacOSX -> "MacOS X" + +let mode : mode ref = ref `Unix + +let set_mode m = + mode := m + +let get_mode () = + !mode + +type _ typ = + | Type: 'a -> 'a typ + | Function: 'a typ * 'b typ -> ('a -> 'b) typ + +let (@->) f t = + Function (f, t) + +module type CONFIGURABLE = sig + type t + val name: t -> string + val module_name: t -> string + val packages: t -> string list + val libraries: t -> string list + val configure: t -> unit + val clean: t -> unit + val update_path: t -> string -> t +end + + +module TODO (N: sig val name: string end) = struct + + let todo str = + failwith (Printf.sprintf "TODO: %s.%s" N.name str) + + let name _ = + todo "name" + + let module_name _ = + todo "module_name" + + let packages _ = + todo "packages" + + let libraries _ = + todo "libraries" + + let configure _ = + todo "configure" + + let clean _ = + todo "clean" + + let update_path _ = + todo "update_path" + +end + +type ('a, 'b) base = { + typ: 'a typ; + t: 'b; + m: (module CONFIGURABLE with type t = 'b); +} + +type 'a foreign = { + name: string; + ty: 'a typ; + libraries: string list; + packages: string list; +} + +type _ impl = + | Impl: ('a, 'b) base -> 'a impl (* base implementation *) + | App: ('a, 'b) app -> 'b impl (* functor application *) + | Foreign: 'a foreign -> 'a impl (* foreign functor implementation *) + +and ('a, 'b) app = { + f: ('a -> 'b) impl; (* functor *) + x: 'a impl; (* parameter *) +} + +let rec string_of_impl: type a. a impl -> string = function + | Impl { t; m = (module M) } -> Printf.sprintf "Impl (%s)" (M.module_name t) + | Foreign { name } -> Printf.sprintf "Foreign (%s)" name + | App { f; x } -> Printf.sprintf "App (%s, %s)" (string_of_impl f) (string_of_impl x) + +type 'a folder = { + fn: 'b. 'a -> 'b impl -> 'a +} + +let rec fold: type a. 'b folder -> a impl -> 'b -> 'b = + fun fn t acc -> + match t with + | Impl _ + | Foreign _ -> fn.fn acc t + | App {f; x} -> fold fn f (fn.fn acc x) + +type iterator = { + i: 'b. 'b impl -> unit +} + +let rec iter: type a. iterator -> a impl -> unit = + fun fn t -> + match t with + | Impl _ + | Foreign _ -> fn.i t + | App {f; x} -> iter fn f; iter fn x; fn.i x + +let driver_initialisation_error name = + Printf.sprintf "fail (Failure %S)" name + +module Name = struct + + let ids = Hashtbl.create 1024 + + let names = Hashtbl.create 1024 + + let create name = + let n = + try 1 + Hashtbl.find ids name + with Not_found -> 1 in + Hashtbl.replace ids name n; + Printf.sprintf "%s%d" name n + + let of_key key ~base = + find_or_create names key (fun () -> create base) + +end + +module Impl = struct + + (* get the left-most module name (ie. the name of the functor). *) + let rec functor_name: type a. a impl -> string = function + | Impl { t; m = (module M) } -> M.module_name t + | Foreign { name } -> name + | App { f } -> functor_name f + + (* return a unique variable name holding the state of the given + module construction. *) + let rec name: type a. a impl -> string = function + | Impl { t; m = (module M) } -> M.name t + | Foreign { name } -> Name.of_key ("f" ^ name) ~base:"f" + | App _ as t -> Name.of_key (module_name t) ~base:"t" + + (* return a unique module name holding the implementation of the + given module construction. *) + and module_name: type a. a impl -> string = function + | Impl { t; m = (module M) } -> M.module_name t + | Foreign { name } -> name + | App { f; x } -> + let name = match module_names f @ [module_name x] with + | [] -> assert false + | [m] -> m + | h::t -> h ^ String.concat "" (List.map (Printf.sprintf "(%s)") t) + in + Name.of_key name ~base:"M" + + and module_names: type a. a impl -> string list = + function t -> + let fn = { + fn = fun acc t -> module_name t :: acc + } in + fold fn t [] + + let rec names: type a. a impl -> string list = function + | Foreign _ -> [] + | Impl _ as t -> [name t] + | App {f=Foreign f; x} -> names x + | App {f; x} -> (names f) @ [name x] + + let configured = Hashtbl.create 31 + + let rec configure: type a. a impl -> unit = + fun t -> + let name = name t in + if not (Hashtbl.mem configured name) then ( + Hashtbl.add configured name true; + match t with + | Impl { t; m = (module M) } -> M.configure t + | Foreign _ -> () + | App {f; x} as app -> + configure_app f; + configure_app x; + iter { i=configure } app; + let name = module_name app in + let body = cofind Name.names name in + append_main "module %s = %s" name body; + newline_main (); + ) + + and configure_app: type a. a impl -> unit = function + | Impl _ + | Foreign _ -> () + | App _ as t -> + let name = name t in + configure t; + begin match names t with + | [n] -> append_main "let %s = %s" name n + | names -> + append_main "let %s () =" name; + List.iter (fun n -> + append_main " %s () >>= function" n; + append_main " | `Error e -> %s" (driver_initialisation_error n); + append_main " | `Ok %s ->" n; + ) names; + append_main " return (`Ok (%s))" (String.concat ", " names) + end; + newline_main () + + let rec packages: type a. a impl -> string list = function + | Impl { t; m = (module M) } -> M.packages t + | Foreign { packages } -> packages + | App {f; x} -> packages f @ packages x + + let rec libraries: type a. a impl -> string list = function + | Impl { t; m = (module M) } -> M.libraries t + | Foreign { libraries } -> libraries + | App {f; x} -> libraries f @ libraries x + + let rec clean: type a. a impl -> unit = function + | Impl { t; m = (module M) } -> M.clean t + | Foreign _ -> () + | App {f; x} -> clean f; clean x + + let rec update_path: type a. a impl -> string -> a impl = + fun t root -> match t with + | Impl b -> + let module M = (val b.m) in Impl { b with t = M.update_path b.t root } + | Foreign _ -> t + | App {f; x} -> App { f = update_path f root; x = update_path x root } + +end + +let impl typ t m = + Impl { typ; t; m } + +let implementation typ t m = + let typ = Type typ in + Impl { typ; t; m } + +let ($) f x = + App { f; x } + +let foreign name ?(libraries=[]) ?(packages=[]) ty = + Foreign { name; ty; libraries; packages } + +let rec typ: type a. a impl -> a typ = function + | Impl { typ } -> typ + | Foreign { ty } -> ty + | App { f } -> match typ f with Function (_, b) -> b | _ -> assert false + + +module Io_page = struct + + (** Memory allocation interface. *) + + type t = unit + + let name () = + "io_page" + + let module_name () = + "Io_page" + + let packages () = [ + "io-page" + ] + + let libraries () = + match !mode with + | `Xen -> ["io-page"] + | `Unix | `MacOSX -> ["io-page"; "io-page.unix"] + + let configure () = () + + let clean () = () + + let update_path () _ = () + +end + +type io_page = IO_PAGE + +let io_page = Type IO_PAGE + +let default_io_page: io_page impl = + impl io_page () (module Io_page) + +module Time = struct + + (** OS Timer. *) + + type t = unit + + let name () = + "time" + + let module_name () = + "OS.Time" + + let packages () = [] + + let libraries () = [] + + let configure () = () + + let clean () = () + + let update_path () _ = () + +end + +type time = TIME + +let time = Type TIME + +let default_time: time impl = + impl time () (module Time) + +module Clock = struct + + (** Clock operations. *) + + type t = unit + + let name () = + "clock" + + let module_name () = + "Clock" + + let packages () = [ + match !mode with + | `Unix | `MacOSX -> "mirage-clock-unix" + | `Xen -> "mirage-clock-xen" + ] + + let libraries () = packages () + + let configure () = + append_main "let clock () = return (`Ok ())"; + newline_main () + + let clean () = () + + let update_path () _ = () + +end + +type clock = CLOCK + +let clock = Type CLOCK + +let default_clock: clock impl = + impl clock () (module Clock) + +module Random = struct + + type t = unit + + let name () = + "random" + + let module_name () = + "Random" + + let packages () = [] + + let libraries () = [] + + let configure () = + append_main "let random () = return (`Ok ())"; + newline_main () + + let clean () = () + + let update_path () _ = () + +end + +type random = RANDOM + +let random = Type RANDOM + +let default_random: random impl = + impl random () (module Random) + +module Entropy = struct + + type t = unit + + let name _ = + "entropy" + + let module_name () = "Entropy" + + let construction () = + match !mode with + | `Unix | `MacOSX -> "Entropy_unix.Make (OS.Time)" + | `Xen -> "Entropy_xen" + + let packages () = + match !mode with + | `Unix | `MacOSX -> [ "mirage-entropy-unix" ] + | `Xen -> [ "mirage-entropy-xen" ] + + let libraries = packages + + let configure t = + append_main "module %s = %s" (module_name t) (construction t) ; + newline_main () ; + append_main "let %s () =" (name t); + append_main " %s.connect ()" (module_name t); + newline_main () + + let clean () = () + + let update_path t _ = t + +end + +type entropy = ENTROPY + +let entropy = Type ENTROPY + +let default_entropy: entropy impl = + impl entropy () (module Entropy) + +module Console = struct + + type t = string + + let name t = + Name.of_key ("console" ^ t) ~base:"console" + + let module_name t = + "Console" + + let construction () = + match !mode with + | `Unix | `MacOSX -> "Console_unix" + | `Xen -> "Console_xen" + + let packages _ = + match !mode with + | `Unix | `MacOSX -> ["mirage-console"; "mirage-unix"] + | `Xen -> ["mirage-console"; "xenstore"; "mirage-xen"; "xen-gnt"; "xen-evtchn"] + + let libraries _ = + match !mode with + | `Unix | `MacOSX -> ["mirage-console.unix"] + | `Xen -> ["mirage-console.xen"] + + let configure t = + append_main "module %s = %s" (module_name t) (construction ()); + newline_main (); + append_main "let %s () =" (name t); + append_main " %s.connect %S" (module_name t) t; + newline_main () + + let clean _ = + () + + let update_path t _ = + t + +end + +type console = CONSOLE + +let console = Type CONSOLE + +let default_console: console impl = + impl console "0" (module Console) + +let custom_console: string -> console impl = + fun str -> + impl console str (module Console) + +module Crunch = struct + + type t = string + + let name t = + Name.of_key ("static" ^ t) ~base:"static" + + let module_name t = + String.capitalize (name t) + + let packages _ = [ + "mirage-types"; + "lwt"; + "cstruct"; + "crunch"; + ] @ Io_page.packages () + + let libraries _ = [ + "mirage-types"; + "lwt"; + "cstruct"; + ] @ Io_page.libraries () + + let ml t = + Printf.sprintf "%s.ml" (name t) + + let mli t = + Printf.sprintf "%s.mli" (name t) + + let configure t = + if not (command_exists "ocaml-crunch") then + error "ocaml-crunch not found, stopping."; + let file = ml t in + if Sys.file_exists t then ( + info "%s %s" (blue_s "Generating:") (Sys.getcwd () / file); + command "ocaml-crunch -o %s %s" file t + ) else + error "The directory %s does not exist." t; + append_main "let %s () =" (name t); + append_main " %s.connect ()" (module_name t); + newline_main () + + let clean t = + remove (ml t); + remove (mli t) + + let update_path t root = + if Sys.file_exists (root / t) then + root / t + else + t + +end + +type kv_ro = KV_RO + +let kv_ro = Type KV_RO + +let crunch dirname = + impl kv_ro dirname (module Crunch) + +module Direct_kv_ro = struct + + include Crunch + + let module_name t = + match !mode with + | `Xen -> Crunch.module_name t + | `Unix | `MacOSX -> "Kvro_fs_unix" + + let packages t = + match !mode with + | `Xen -> Crunch.packages t + | `Unix | `MacOSX -> "mirage-fs-unix" :: Crunch.packages t + + let libraries t = + match !mode with + | `Xen -> Crunch.libraries t + | `Unix | `MacOSX -> "mirage-fs-unix" :: Crunch.libraries t + + let configure t = + match !mode with + | `Xen -> Crunch.configure t + | `Unix | `MacOSX -> + append_main "let %s () =" (name t); + append_main " Kvro_fs_unix.connect %S" t + +end + +let direct_kv_ro dirname = + impl kv_ro dirname (module Direct_kv_ro) + +module Block = struct + + type t = string + + let name t = + Name.of_key ("block" ^ t) ~base:"block" + + let module_name _ = + "Block" + + let packages _ = [ + match !mode with + | `Unix | `MacOSX -> "mirage-block-unix" + | `Xen -> "mirage-block-xen" + ] + + let libraries _ = [ + match !mode with + | `Unix | `MacOSX -> "mirage-block-unix" + | `Xen -> "mirage-block-xen.front" + ] + + let configure t = + append_main "let %s () =" (name t); + append_main " %s.connect %S" (module_name t) t; + newline_main () + + let clean t = + () + + let update_path t root = + if Sys.file_exists (root / t) then + root / t + else + t + +end + +type block = BLOCK + +let block = Type BLOCK + +let block_of_file filename = + impl block filename (module Block) + +module Fat = struct + + type t = { + io_page: io_page impl; + block : block impl; + } + + let name t = + let key = "fat" ^ Impl.name t.io_page ^ Impl.name t.block in + Name.of_key key ~base:"fat" + + let module_name t = + String.capitalize (name t) + + let packages t = + "fat-filesystem" + :: Impl.packages t.io_page + @ Impl.packages t.block + + let libraries t = + "fat-filesystem" + :: Impl.libraries t.io_page + @ Impl.libraries t.block + + let configure t = + Impl.configure t.io_page; + Impl.configure t.block; + append_main "module %s = Fat.Fs.Make(%s)(%s)" + (module_name t) + (Impl.module_name t.block) + (Impl.module_name t.io_page); + newline_main (); + let name = name t in + append_main "let %s () =" name; + append_main " %s () >>= function" (Impl.name t.block); + append_main " | `Error _ -> %s" (driver_initialisation_error name); + append_main " | `Ok dev -> %s.connect dev" (module_name t); + newline_main () + + let clean t = + Impl.clean t.block; + Impl.clean t.io_page + + let update_path t root = + { io_page = Impl.update_path t.io_page root; + block = Impl.update_path t.block root; + } + +end + +type fs = FS + +let fs = Type FS + +let fat ?(io_page=default_io_page) block: fs impl = + let t = { Fat.block; io_page } in + impl fs t (module Fat) + +(* This would deserve to be in its own lib. *) +let kv_ro_of_fs x: kv_ro impl = + let dummy_fat = fat (block_of_file "xx") in + let libraries = Impl.libraries dummy_fat in + let packages = Impl.packages dummy_fat in + let fn = foreign "Fat.KV_RO.Make" ~libraries ~packages (fs @-> kv_ro) in + fn $ x + +module Fat_of_files = struct + + type t = { + dir : string option; + regexp: string; + } + + let name t = + Name.of_key + ("fat" ^ (match t.dir with None -> "." | Some d -> d) ^ ":" ^ t.regexp) + ~base:"fat" + + let module_name t = + String.capitalize (name t) + + let block_file t = + name t ^ ".img" + + let block t = + block_of_file (block_file t) + + let packages t = + Impl.packages (fat (block t)) + + let libraries t = + Impl.libraries (fat (block t)) + + let configure t = + let fat = fat (block t) in + Impl.configure fat; + append_main "module %s = %s" (module_name t) (Impl.module_name fat); + append_main "let %s = %s" (name t) (Impl.name fat); + newline_main (); + let file = Printf.sprintf "make-%s-image.sh" (name t) in + let oc = open_out file in + append oc "#!/bin/sh"; + append oc ""; + append oc "echo This uses the 'fat' command-line tool to build a simple FAT"; + append oc "echo filesystem image."; + append oc ""; + append oc "FAT=$(which fat)"; + append oc "if [ ! -x \"${FAT}\" ]; then"; + append oc " echo I couldn\\'t find the 'fat' command-line tool."; + append oc " echo Try running 'opam install fat-filesystem'"; + append oc " exit 1"; + append oc "fi"; + append oc ""; + append oc "IMG=$(pwd)/%s" (block_file t); + append oc "rm -f ${IMG}"; + (match t.dir with None -> () | Some d -> append oc "cd %s/" d); + append oc "SIZE=$(du -s . | cut -f 1)"; + append oc "${FAT} create ${IMG} ${SIZE}KiB"; + append oc "${FAT} add ${IMG} %s" t.regexp; + append oc "echo Created '%s'" (block_file t); + + close_out oc; + Unix.chmod file 0o755; + command "./make-%s-image.sh" (name t) + + let clean t = + command "rm -f make-%s-image.sh %s" (name t) (block_file t); + Impl.clean (block t) + + let update_path t root = + match t.dir with + | None -> t + | Some d -> { t with dir = Some (root / d) } + +end + +let fat_of_files: ?dir:string -> ?regexp:string -> unit -> fs impl = + fun ?dir ?regexp () -> + let regexp = match regexp with + | None -> "*" + | Some r -> r in + impl fs { Fat_of_files.dir; regexp } (module Fat_of_files) + +type network_config = Tap0 | Custom of string + +module Network = struct + + type t = network_config + + let name t = + "net_" ^ match t with + | Tap0 -> "tap0" + | Custom s -> s + + let module_name _ = + "Netif" + + let packages t = + match !mode with + | `Unix -> ["mirage-net-unix"] + | `MacOSX -> ["mirage-net-macosx"] + | `Xen -> ["mirage-net-xen"] + + let libraries t = + packages t + + let configure t = + append_main "let %s () =" (name t); + append_main " %s.connect %S" + (module_name t) + (match t with Tap0 -> "tap0" | Custom s -> s); + newline_main () + + let clean _ = + () + + let update_path t _ = + t + +end + +type network = NETWORK + +let network = Type NETWORK + +let tap0 = + impl network Tap0 (module Network) + +let netif dev = + impl network (Custom dev) (module Network) + +module Ethif = struct + + type t = network impl + + let name t = + Name.of_key ("ethif" ^ Impl.name t) ~base:"ethif" + + let module_name t = + String.capitalize (name t) + + let packages t = + Impl.packages t @ ["tcpip"] + + let libraries t = + Impl.libraries t @ + match !mode with + | `Unix | `MacOSX -> [ "tcpip.ethif-unix" ] + | `Xen -> [ "tcpip.ethif" ] + + let configure t = + let name = name t in + Impl.configure t; + append_main "module %s = Ethif.Make(%s)" (module_name t) (Impl.module_name t); + newline_main (); + append_main "let %s () =" name; + append_main " %s () >>= function" (Impl.name t); + append_main " | `Error _ -> %s" (driver_initialisation_error name); + append_main " | `Ok eth -> %s.connect eth" (module_name t); + newline_main () + + let clean t = + Impl.clean t + + let update_path t root = + Impl.update_path t root + +end + +type ethernet = ETHERNET + +let ethernet = Type ETHERNET + +let etif network = + impl ethernet network (module Ethif) + +type ('ipaddr, 'prefix) ip_config = { + address: 'ipaddr; + netmask: 'prefix; + gateways: 'ipaddr list; +} + +type ipv4_config = (Ipaddr.V4.t, Ipaddr.V4.t) ip_config + +let meta_ipv4_config t = + Printf.sprintf "(Ipaddr.V4.of_string_exn %S, Ipaddr.V4.of_string_exn %S, [%s])" + (Ipaddr.V4.to_string t.address) + (Ipaddr.V4.to_string t.netmask) + (String.concat "; " + (List.map (Printf.sprintf "Ipaddr.V4.of_string_exn %S") + (List.map Ipaddr.V4.to_string t.gateways))) + +module IPV4 = struct + + type t = { + ethernet: ethernet impl; + config : ipv4_config; + } + (* XXX: should the type if ipv4.id be ipv4.t ? + N.connect ethif |> N.set_ip up *) + + let name t = + let key = "ipv4" ^ Impl.name t.ethernet ^ meta_ipv4_config t.config in + Name.of_key key ~base:"ipv4" + + let module_name t = + String.capitalize (name t) + + let packages t = + "tcpip" :: Impl.packages t.ethernet + + let libraries t = + (match !mode with + | `Unix | `MacOSX -> [ "tcpip.ipv4-unix" ] + | `Xen -> [ "tcpip.ipv4" ]) + @ Impl.libraries t.ethernet + + let configure t = + let name = name t in + let mname = module_name t in + Impl.configure t.ethernet; + append_main "module %s = Ipv4.Make(%s)" + (module_name t) (Impl.module_name t.ethernet); + newline_main (); + append_main "let %s () =" name; + append_main " %s () >>= function" (Impl.name t.ethernet); + append_main " | `Error _ -> %s" (driver_initialisation_error name); + append_main " | `Ok eth ->"; + append_main " %s.connect eth >>= function" mname; + append_main " | `Error _ -> %s" (driver_initialisation_error "IPV4"); + append_main " | `Ok ip ->"; + append_main " let i = Ipaddr.V4.of_string_exn in"; + append_main " %s.set_ip ip (i %S) >>= fun () ->" + mname (Ipaddr.V4.to_string t.config.address); + append_main " %s.set_ip_netmask ip (i %S) >>= fun () ->" + mname (Ipaddr.V4.to_string t.config.netmask); + append_main " %s.set_ip_gateways ip [%s] >>= fun () ->" + mname + (String.concat "; " + (List.map + (fun n -> Printf.sprintf "(i %S)" (Ipaddr.V4.to_string n)) + t.config.gateways)); + append_main " return (`Ok ip)"; + newline_main () + + let clean t = + Impl.clean t.ethernet + + let update_path t root = + { t with ethernet = Impl.update_path t.ethernet root } + +end + +type ipv6_config = (Ipaddr.V6.t, Ipaddr.V6.Prefix.t list) ip_config + +let meta_ipv6_config t = + Printf.sprintf "(Ipaddr.V6.of_string_exn %S, [%s], [%s])" + (Ipaddr.V6.to_string t.address) + (String.concat "; " + (List.map (Printf.sprintf "Ipaddr.V6.Prefix.of_string_exn %S") + (List.map Ipaddr.V6.Prefix.to_string t.netmask))) + (String.concat "; " + (List.map (Printf.sprintf "Ipaddr.V6.of_string_exn %S") + (List.map Ipaddr.V6.to_string t.gateways))) + +module IPV6 = struct + + type t = { + time : time impl; + clock : clock impl; + ethernet: ethernet impl; + config : ipv6_config; + } + (* XXX: should the type if ipv4.id be ipv4.t ? + N.connect ethif |> N.set_ip up *) + + let name t = + let key = "ipv6" ^ Impl.name t.time ^ Impl.name t.clock ^ Impl.name t.ethernet ^ meta_ipv6_config t.config in + Name.of_key key ~base:"ipv6" + + let module_name t = + String.capitalize (name t) + + let packages t = + "tcpip" :: Impl.packages t.time @ Impl.packages t.clock @ Impl.packages t.ethernet + + let libraries t = + (match !mode with + | `Unix | `MacOSX -> [ "tcpip.ipv6-unix" ] + | `Xen -> [ "tcpip.ipv6" ]) + @ Impl.libraries t.time @ Impl.libraries t.clock @ Impl.libraries t.ethernet + + let configure t = + let name = name t in + let mname = module_name t in + Impl.configure t.ethernet; + append_main "module %s = Ipv6.Make(%s)(%s)(%s)" + (module_name t) (Impl.module_name t.ethernet) (Impl.module_name t.time) (Impl.module_name t.clock); + newline_main (); + append_main "let %s () =" name; + append_main " %s () >>= function" (Impl.name t.ethernet); + append_main " | `Error _ -> %s" (driver_initialisation_error name); + append_main " | `Ok eth ->"; + append_main " %s.connect eth >>= function" mname; + append_main " | `Error _ -> %s" (driver_initialisation_error name); + append_main " | `Ok ip ->"; + append_main " let i = Ipaddr.V6.of_string_exn in"; + append_main " %s.set_ip ip (i %S) >>= fun () ->" + mname (Ipaddr.V6.to_string t.config.address); + List.iter begin fun netmask -> + append_main " %s.set_ip_netmask ip (i %S) >>= fun () ->" + mname (Ipaddr.V6.Prefix.to_string netmask) + end t.config.netmask; + append_main " %s.set_ip_gateways ip [%s] >>= fun () ->" + mname + (String.concat "; " + (List.map + (fun n -> Printf.sprintf "(i %S)" (Ipaddr.V6.to_string n)) + t.config.gateways)); + append_main " return (`Ok ip)"; + newline_main () + + let clean t = + Impl.clean t.time; + Impl.clean t.clock; + Impl.clean t.ethernet + + let update_path t root = + { t with + time = Impl.update_path t.time root; + clock = Impl.update_path t.clock root; + ethernet = Impl.update_path t.ethernet root } + +end + +type v4 +type v6 + +type 'a ip = IP + +let ip = Type IP + +type ipv4 = v4 ip +type ipv6 = v6 ip + +let ipv4 : ipv4 typ = ip +let ipv6 : ipv6 typ = ip + +let create_ipv4 net config = + let etif = etif net in + let t = { + IPV4.ethernet = etif; + config } in + impl ipv4 t (module IPV4) + +let default_ipv4_conf = + let i = Ipaddr.V4.of_string_exn in + { + address = i "10.0.0.2"; + netmask = i "255.255.255.0"; + gateways = [i "10.0.0.1"]; + } + +let default_ipv4 net = + create_ipv4 net default_ipv4_conf + +let create_ipv6 + ?(time = default_time) + ?(clock = default_clock) + net config = + let etif = etif net in + let t = { + IPV6.ethernet = etif; + time; clock; + config + } in + impl ipv6 t (module IPV6) + +module UDP_direct (V : sig type t end) = struct + + type t = V.t ip impl + + let name t = + Name.of_key ("udp" ^ Impl.name t) ~base:"udp" + + let module_name t = + String.capitalize (name t) + + let packages t = + Impl.packages t @ [ "tcpip" ] + + let libraries t = + Impl.libraries t @ [ "tcpip.udp" ] + + let configure t = + let name = name t in + Impl.configure t; + append_main "module %s = Udp.Make(%s)" (module_name t) (Impl.module_name t); + newline_main (); + append_main "let %s () =" name; + append_main " %s () >>= function" (Impl.name t); + append_main " | `Error _ -> %s" (driver_initialisation_error name); + append_main " | `Ok ip -> %s.connect ip" (module_name t); + newline_main () + + let clean t = + Impl.clean t + + let update_path t root = + Impl.update_path t root + +end + +module UDPV4_socket = struct + + type t = Ipaddr.V4.t option + + let name _ = "udpv4_socket" + + let module_name _ = "Udpv4_socket" + + let packages t = [ "tcpip" ] + + let libraries t = + match !mode with + | `Unix | `MacOSX -> [ "tcpip.udpv4-socket" ] + | `Xen -> failwith "No socket implementation available for Xen" + + let configure t = + append_main "let %s () =" (name t); + let ip = match t with + | None -> "None" + | Some ip -> + Printf.sprintf "Some (Ipaddr.V4.of_string_exn %s)" (Ipaddr.V4.to_string ip) + in + append_main " %s.connect %S" (module_name t) ip; + newline_main () + + let clean t = + () + + let update_path t root = + t + +end + +type 'a udp = UDP + +type udpv4 = v4 udp +type udpv6 = v6 udp + +let udp = Type UDP +let udpv4 : udpv4 typ = udp +let udpv6 : udpv6 typ = udp + +let direct_udp (type v) (ip : v ip impl) = + impl udp ip (module UDP_direct (struct type t = v end)) + +let socket_udpv4 ip = + impl udpv4 ip (module UDPV4_socket) + +module TCP_direct (V : sig type t end) = struct + + type t = { + clock : clock impl; + time : time impl; + ip : V.t ip impl; + random: random impl; + } + + let name t = + let key = "tcp" + ^ Impl.name t.clock + ^ Impl.name t.time + ^ Impl.name t.ip in + Name.of_key key ~base:"tcp" + + let module_name t = + String.capitalize (name t) + + let packages t = + "tcpip" + :: Impl.packages t.clock + @ Impl.packages t.time + @ Impl.packages t.ip + @ Impl.packages t.random + + let libraries t = + "tcpip.tcp" + :: Impl.libraries t.clock + @ Impl.libraries t.time + @ Impl.libraries t.ip + @ Impl.libraries t.random + + let configure t = + let name = name t in + Impl.configure t.clock; + Impl.configure t.time; + Impl.configure t.ip; + Impl.configure t.random; + append_main "module %s = Tcp.Flow.Make(%s)(%s)(%s)(%s)" + (module_name t) + (Impl.module_name t.ip) + (Impl.module_name t.time) + (Impl.module_name t.clock) + (Impl.module_name t.random); + newline_main (); + append_main "let %s () =" name; + append_main " %s () >>= function" (Impl.name t.ip); + append_main " | `Error _ -> %s" (driver_initialisation_error (Impl.name t.ip)); + append_main " | `Ok ip -> %s.connect ip" (module_name t); + newline_main () + + let clean t = + Impl.clean t.clock; + Impl.clean t.time; + Impl.clean t.ip; + Impl.clean t.random + + let update_path t root = + { clock = Impl.update_path t.clock root; + ip = Impl.update_path t.ip root; + time = Impl.update_path t.time root; + random = Impl.update_path t.random root; + } + +end + +module TCPV4_socket = struct + + type t = Ipaddr.V4.t option + + let name _ = "tcpv4_socket" + + let module_name _ = "Tcpv4_socket" + + let packages t = [ "tcpip" ] + + let libraries t = + match !mode with + | `Unix | `MacOSX -> [ "tcpip.tcpv4-socket" ] + | `Xen -> failwith "No socket implementation available for Xen" + + let configure t = + append_main "let %s () =" (name t); + let ip = match t with + | None -> "None" + | Some ip -> + Printf.sprintf "Some (Ipaddr.V4.of_string_exn %s)" (Ipaddr.V4.to_string ip) + in + append_main " %s.connect %S" (module_name t) ip; + newline_main () + + let clean t = + () + + let update_path t root = + t + +end + +type 'a tcp = TCP + +type tcpv4 = v4 tcp +type tcpv6 = v6 tcp + +let tcp = Type TCP +let tcpv4 : tcpv4 typ = tcp +let tcpv6 : tcpv6 typ = tcp + +let direct_tcp (type v) + ?(clock=default_clock) ?(random=default_random) ?(time=default_time) (ip : v ip impl) = + let module TCP_direct = TCP_direct (struct type t = v end) in + let t = { TCP_direct.clock; random; time; ip } in + impl tcp t (module TCP_direct) + +let socket_tcpv4 ip = + impl tcpv4 ip (module TCPV4_socket) + +module STACKV4_direct = struct + + type t = { + clock : clock impl; + time : time impl; + console: console impl; + network: network impl; + random : random impl; + config : [`DHCP | `IPV4 of ipv4_config]; + } + + let name t = + let key = "stackv4" + ^ Impl.name t.clock + ^ Impl.name t.time + ^ Impl.name t.console + ^ Impl.name t.network + ^ Impl.name t.random + ^ match t.config with + | `DHCP -> "dhcp" + | `IPV4 i -> meta_ipv4_config i in + Name.of_key key ~base:"stackv4" + + let module_name t = + String.capitalize (name t) + + let packages t = + "tcpip" + :: Impl.packages t.clock + @ Impl.packages t.time + @ Impl.packages t.console + @ Impl.packages t.network + @ Impl.packages t.random + + let libraries t = + "tcpip.stack-direct" + :: "mirage.runtime" + :: Impl.libraries t.clock + @ Impl.libraries t.time + @ Impl.libraries t.console + @ Impl.libraries t.network + @ Impl.libraries t.random + + let configure t = + let name = name t in + Impl.configure t.clock; + Impl.configure t.time; + Impl.configure t.console; + Impl.configure t.network; + Impl.configure t.random; + append_main "module %s = struct" (module_name t); + append_main " module E = Ethif.Make(%s)" (Impl.module_name t.network); + append_main " module I = Ipv4.Make(E)"; + append_main " module U = Udp.Make(I)"; + append_main " module T = Tcp.Flow.Make(I)(%s)(%s)(%s)" + (Impl.module_name t.time) + (Impl.module_name t.clock) + (Impl.module_name t.random); + append_main " module S = Tcpip_stack_direct.Make(%s)(%s)(%s)(%s)(E)(I)(U)(T)" + (Impl.module_name t.console) + (Impl.module_name t.time) + (Impl.module_name t.random) + (Impl.module_name t.network); + append_main " include S"; + append_main "end"; + newline_main (); + append_main "let %s () =" name; + append_main " %s () >>= function" (Impl.name t.console); + append_main " | `Error _ -> %s" + (driver_initialisation_error (Impl.name t.console)); + append_main " | `Ok console ->"; + append_main " %s () >>= function" (Impl.name t.network); + append_main " | `Error e ->"; + let net_init_error_msg_fn = "Mirage_runtime.string_of_network_init_error" in + append_main " fail (Failure (%s %S e))" + net_init_error_msg_fn (Impl.name t.network); + append_main " | `Ok interface ->"; + append_main " let config = {"; + append_main " V1_LWT.name = %S;" name; + append_main " console; interface;"; + begin match t.config with + | `DHCP -> append_main " mode = `DHCP;" + | `IPV4 i -> append_main " mode = `IPv4 %s;" (meta_ipv4_config i); + end; + append_main " } in"; + append_main " %s.connect config" (module_name t); + newline_main () + + let clean t = + Impl.clean t.clock; + Impl.clean t.time; + Impl.clean t.console; + Impl.clean t.network; + Impl.clean t.random + + let update_path t root = + { t with + clock = Impl.update_path t.clock root; + time = Impl.update_path t.time root; + console = Impl.update_path t.console root; + network = Impl.update_path t.network root; + random = Impl.update_path t.random root; + } + +end + +module STACKV4_socket = struct + + type t = { + console: console impl; + ipv4s : Ipaddr.V4.t list; + } + + let meta_ips ips = + String.concat "; " + (List.map (fun x -> + Printf.sprintf "Ipaddr.V4.of_string_exn %S" (Ipaddr.V4.to_string x) + ) ips) + + let name t = + let key = "stackv4" ^ Impl.name t.console ^ meta_ips t.ipv4s in + Name.of_key key ~base:"stackv4" + + let module_name t = + String.capitalize (name t) + + let packages t = + "tcpip" :: Impl.packages t.console + + let libraries t = + "tcpip.stack-socket" :: Impl.libraries t.console + + let configure t = + let name = name t in + Impl.configure t.console; + append_main "module %s = Tcpip_stack_socket.Make(%s)" + (module_name t) (Impl.module_name t.console); + newline_main (); + append_main "let %s () =" name; + append_main " %s () >>= function" (Impl.name t.console); + append_main " | `Error _ -> %s" + (driver_initialisation_error (Impl.name t.console)); + append_main " | `Ok console ->"; + append_main " let config = {"; + append_main " V1_LWT.name = %S;" name; + append_main " console; interface = [%s];" (meta_ips t.ipv4s); + append_main " mode = ();"; + append_main " } in"; + append_main " %s.connect config" (module_name t); + newline_main () + + let clean t = + Impl.clean t.console + + let update_path t root = + { t with console = Impl.update_path t.console root } + +end + +type stackv4 = STACKV4 + +let stackv4 = Type STACKV4 + +let direct_stackv4_with_dhcp + ?(clock=default_clock) + ?(random=default_random) + ?(time=default_time) + console network = + let t = { + STACKV4_direct.console; network; time; clock; random; + config = `DHCP } in + impl stackv4 t (module STACKV4_direct) + +let direct_stackv4_with_default_ipv4 + ?(clock=default_clock) + ?(random=default_random) + ?(time=default_time) + console network = + let t = { + STACKV4_direct.console; network; clock; time; random; + config = `IPV4 default_ipv4_conf; + } in + impl stackv4 t (module STACKV4_direct) + +let direct_stackv4_with_static_ipv4 + ?(clock=default_clock) + ?(random=default_random) + ?(time=default_time) + console network ipv4 = + let t = { + STACKV4_direct.console; network; clock; time; random; + config = `IPV4 ipv4; + } in + impl stackv4 t (module STACKV4_direct) + +let socket_stackv4 console ipv4s = + impl stackv4 { STACKV4_socket.console; ipv4s } (module STACKV4_socket) + +module Channel_over_TCP (V : sig type t end) = struct + + type t = V.t tcp impl + + let name t = + let key = "channel" ^ Impl.name t in + Name.of_key key ~base:"channel" + + let module_name t = + String.capitalize (name t) + + let packages _ = + [ "mirage-tcpip" ] + + let libraries _ = + [ "tcpip.channel" ] + + let configure t = + Impl.configure t; + append_main "module %s = Channel.Make(%s)" (module_name t) (Impl.module_name t); + newline_main (); + append_main "let %s () =" (name t); + append_main " %s () >>= function" (Impl.name t); + append_main " | `Error _ -> %s" (driver_initialisation_error (Impl.name t)); + append_main " | `Ok console ->"; + append_main " let flow = %s.create config in" (module_name t); + append_main " return (`Ok flow)"; + newline_main () + + let clean t = + Impl.clean t + + let update_path t root = + Impl.update_path t root + +end + +type channel = CHANNEL + +let channel = Type CHANNEL + +let channel_over_tcp (type v) (flow : v tcp impl) = + impl channel flow (module Channel_over_TCP (struct type t = v end)) + +module VCHAN_localhost = struct + + type uuid = string + type t = uuid + + let name t = + let key = "in_memory" in + Name.of_key key ~base:"vchan" + + let module_name t = + String.capitalize (name t) + + let packages t = + [ "mirage-conduit" ] + + let libraries t = + [ "conduit.mirage" ] + + let configure t = + append_main "module %s = Conduit_localhost" (module_name t); + newline_main (); + append_main "let %s = %s.register %S" (name t) (module_name t) t; + newline_main () + + let clean t = () + + let update_path t root = t + +end + +module VCHAN_xenstore = struct + + type uuid = string + type t = string + + let name t = + let key = "xen" in + Name.of_key key ~base:"vchan" + + let module_name t = + String.capitalize (name t) + + let packages t = + match !mode with + |`Xen -> [ "vchan"; "mirage-xen"; "xen-evtchn"; "xen-gnt" ] + |`Unix | `MacOSX -> [ "vchan"; "xen-evtchn"; "xen-gnt"] + (* TODO: emit a failure on MacOSX? *) + + let libraries t = + match !mode with + |`Xen -> [ "conduit.mirage-xen" ] + |`Unix | `MacOSX-> [ "vchan" ] + + let configure t = + let m = + match !mode with + |`Xen -> "Conduit_xenstore" + |`Unix | `MacOSX -> "Vchan_lwt_unix.M" + in + append_main "module %s = %s" (module_name t) m; + newline_main (); + append_main "let %s = %s.register %S" (name t) (module_name t) t; + newline_main () + + let clean t = () + + let update_path t root = t + +end + +type vchan = STACK4 + +let vchan = Type STACK4 + +let vchan_localhost ?(uuid="localhost") () = + impl vchan uuid (module VCHAN_localhost) + +let vchan_xen ?(uuid="localhost") () = + impl vchan uuid (module VCHAN_xenstore) + +let vchan_default ?uuid () = + match !mode with + | `Xen -> vchan_xen ?uuid () + | `Unix | `MacOSX -> vchan_localhost ?uuid () + +module Conduit = struct + type t = + [ `Stack of stackv4 impl * vchan impl ] + + let name t = + let key = "conduit" ^ match t with + | `Stack (s,v) -> + Printf.sprintf "%s_%s" (Impl.name s) (Impl.name v) in + Name.of_key key ~base:"conduit" + + let module_name_core t = + String.capitalize (name t) + + let module_name t = + module_name_core t + + let packages t = + [ "conduit"; "mirage-types"; "vchan" ] @ + match t with + | `Stack (s,v) -> Impl.packages s @ (Impl.packages v) + + let libraries t = + [ "conduit.mirage" ] @ + match t with + | `Stack (s,v) -> Impl.libraries s @ (Impl.libraries v) + + let configure t = + begin match t with + | `Stack (s,v) -> + Impl.configure s; + Impl.configure v; + append_main "module %s = Conduit_mirage.Make(%s)(%s)" + (module_name_core t) (Impl.module_name s) (Impl.module_name v); + end; + newline_main (); + append_main "let %s () =" (name t); + let (stack_subname, vchan_subname) = match t with + | `Stack (s,v) -> Impl.name s, Impl.name v in + + append_main " %s () >>= function" stack_subname; + append_main " | `Error _ -> %s" (driver_initialisation_error stack_subname); + append_main " | `Ok %s ->" stack_subname; + append_main " %s >>= fun %s ->" vchan_subname vchan_subname; + append_main " %s.init ~peer:%s ~stack:%s () >>= fun %s ->" + (module_name_core t) vchan_subname stack_subname (name t); + append_main " return (`Ok %s)" (name t); + newline_main () + + let clean = function + | `Stack (s,v) -> Impl.clean s; Impl.clean v + + let update_path t root = + match t with + | `Stack (s,v) -> + `Stack ((Impl.update_path s root), (Impl.update_path v root)) + +end + +type conduit = Conduit + +let conduit = Type Conduit + +let conduit_direct ?(vchan=vchan_localhost ()) stack = + impl conduit (`Stack (stack,vchan)) (module Conduit) + +type conduit_client = [ + | `TCP of Ipaddr.t * int + | `Vchan of string list +] + +type conduit_server = [ + | `TCP of [ `Port of int ] + | `Vchan of string list +] + +module Resolver_unix = struct + type t = unit + + let name t = + let key = "resolver_unix" in + Name.of_key key ~base:"resolver" + + let module_name_core t = + String.capitalize (name t) + + let module_name t = + module_name_core t + + let packages t = + match !mode with + |`Unix | `MacOSX -> [ "mirage-conduit" ] + |`Xen -> failwith "Resolver_unix not supported on Xen" + + let libraries t = + [ "conduit.mirage"; "conduit.lwt-unix" ] + + let configure t = + append_main "module %s = Resolver_lwt" (module_name t); + append_main "let %s () =" (name t); + append_main " return (`Ok Resolver_lwt_unix.system)"; + newline_main () + + let clean t = () + + let update_path t root = t + +end + +module Resolver_direct = struct + type t = + [ `DNS of stackv4 impl * Ipaddr.V4.t option * int option ] + + let name t = + let key = "resolver" ^ match t with + | `DNS (s,_,_) -> Impl.name s in + Name.of_key key ~base:"resolver" + + let module_name_core t = + String.capitalize (name t) + + let module_name t = + (module_name_core t) ^ "_res" + + let packages t = + [ "dns"; "tcpip" ] @ + match t with + | `DNS (s,_,_) -> Impl.packages s + + let libraries t = + [ "dns.mirage" ] @ + match t with + | `DNS (s,_,_) -> Impl.libraries s + + let configure t = + begin match t with + | `DNS (s,_,_) -> + Impl.configure s; + append_main "module %s = Resolver_lwt" (module_name t); + append_main "module %s_dns = Dns_resolver_mirage.Make(OS.Time)(%s)" + (module_name_core t) (Impl.module_name s); + append_main "module %s = Resolver_mirage.Make(%s_dns)" + (module_name_core t) (module_name_core t); + end; + newline_main (); + append_main "let %s () =" (name t); + let subname = match t with + | `DNS (s,_,_) -> Impl.name s in + append_main " %s () >>= function" subname; + append_main " | `Error _ -> %s" (driver_initialisation_error subname); + append_main " | `Ok %s ->" subname; + let res_ns = match t with + | `DNS (_,None,_) -> "None" + | `DNS (_,Some ns,_) -> + Printf.sprintf "Ipaddr.V4.of_string %S" (Ipaddr.V4.to_string ns) in + append_main " let ns = %s in" res_ns; + let res_ns_port = match t with + | `DNS (_,_,None) -> "None" + | `DNS (_,_,Some ns_port) -> Printf.sprintf "Some %d" ns_port in + append_main " let ns_port = %s in" res_ns_port; + append_main " let res = %s.init ?ns ?ns_port ~stack:%s () in" (module_name_core t) subname; + append_main " return (`Ok res)"; + newline_main () + + let clean = function + | `DNS (s,_,_) -> Impl.clean s + + let update_path t root = + match t with + | `DNS (s,a,b) -> `DNS (Impl.update_path s root, a, b) + +end + +type resolver = Resolver + +let resolver = Type Resolver + +let resolver_dns ?ns ?ns_port stack = + impl resolver (`DNS (stack, ns, ns_port)) (module Resolver_direct) + +let resolver_unix_system = + impl resolver () (module Resolver_unix) + +module HTTP = struct + + type t = + [ `Channel of channel impl + | `Stack of conduit_server * conduit impl ] + + let name t = + let key = "http" ^ match t with + | `Channel c -> Impl.name c + | `Stack (_, c) -> Impl.name c in + Name.of_key key ~base:"http" + + let module_name_core t = + String.capitalize (name t) + + let module_name t = + module_name_core t ^ ".Server" + + let packages t = + [ "mirage-http" ] @ + match t with + | `Channel c -> Impl.packages c + | `Stack (_, c) -> Impl.packages c + + let libraries t = + [ "mirage-http" ] @ + match t with + | `Channel c -> Impl.libraries c + | `Stack (_, c) -> Impl.libraries c + + let configure t = + begin match t with + | `Channel c -> + Impl.configure c; + append_main "module %s = HTTP.Make(%s)" (module_name_core t) (Impl.module_name c) + | `Stack (_, c) -> + Impl.configure c; + append_main "module %s = HTTP.Make(%s)" (module_name_core t) (Impl.module_name c) + end; + newline_main (); + let subname = match t with + | `Channel c -> Impl.name c + | `Stack (_,c) -> Impl.name c in + append_main "let %s () =" (name t); + append_main " %s () >>= function" subname; + append_main " | `Error _ -> %s" (driver_initialisation_error subname); + append_main " | `Ok %s ->" subname; + begin match t with + | `Channel c -> failwith "TODO" + | `Stack (m,c) -> + append_main " let listen spec ="; + append_main " let ctx = %s in" (Impl.name c); + append_main " let mode = %s in" + (match m with + |`TCP (`Port port) -> Printf.sprintf "`TCP (`Port %d)" port + |`Vchan l -> failwith "Vchan not supported yet in server" + ); + append_main " %s.serve ~ctx ~mode (%s.Server.listen spec)" (Impl.module_name c) (module_name_core t); + append_main " in"; + append_main " return (`Ok listen)"; + end; + newline_main () + + let clean = function + | `Channel c -> Impl.clean c + | `Stack (_,c) -> Impl.clean c + + let update_path t root = + match t with + | `Channel c -> `Channel (Impl.update_path c root) + | `Stack (m, c) -> `Stack (m, Impl.update_path c root) + +end + +type http = HTTP + +let http = Type HTTP + +let http_server_of_channel chan = + impl http (`Channel chan) (module HTTP) + +let http_server mode conduit = + impl http (`Stack (mode, conduit)) (module HTTP) + +type job = JOB + +let job = Type JOB + +module Job = struct + + type t = { + name: string; + impl: job impl; + } + + let create impl = + let name = Name.create "job" in + { name; impl } + + let name t = + t.name + + let module_name t = + "Job_" ^ t.name + + let packages t = + Impl.packages t.impl + + let libraries t = + Impl.libraries t.impl + + let configure t = + Impl.configure t.impl; + newline_main () + + let clean t = + Impl.clean t.impl + + let update_path t root = + { t with impl = Impl.update_path t.impl root } + +end + +module Tracing = struct + type t = { + size : int; + } + + let unix_trace_file = "trace.ctf" + + let packages _ = StringSet.singleton "mirage-profile" + + let libraries _ = + match !mode with + | `Unix | `MacOSX -> StringSet.singleton "mirage-profile.unix" + | `Xen -> StringSet.singleton "mirage-profile.xen" + + let configure t = + if Sys.command "ocamlfind query lwt.tracing 2>/dev/null" <> 0 then ( + flush stdout; + error "lwt.tracing module not found. Hint:\n\ + opam pin add lwt 'https://github.com/mirage/lwt.git#tracing'" + ); + + append_main "let () = "; + begin match !mode with + | `Unix | `MacOSX -> + append_main " let buffer = MProf_unix.mmap_buffer ~size:%d %S in" t.size unix_trace_file; + append_main " let trace_config = MProf.Trace.Control.make buffer MProf_unix.timestamper in"; + append_main " MProf.Trace.Control.start trace_config"; + | `Xen -> + append_main " let trace_pages = MProf_xen.make_shared_buffer ~size:%d in" t.size; + append_main " let buffer = trace_pages |> Io_page.to_cstruct |> Cstruct.to_bigarray in"; + append_main " let trace_config = MProf.Trace.Control.make buffer MProf_xen.timestamper in"; + append_main " MProf.Trace.Control.start trace_config;"; + append_main " MProf_xen.share_with (module Gnt.Gntshr) (module OS.Xs) ~domid:0 trace_pages"; + append_main " |> OS.Main.run"; + end; + newline_main () +end + +type tracing = Tracing.t + +let mprof_trace ~size () = + { Tracing.size } + +type t = { + name: string; + root: string; + jobs: job impl list; + tracing: tracing option; +} + +let t = ref None + +let config_file = ref None + +let reset () = + config_file := None; + t := None + +let set_config_file f = + config_file := Some f + +let get_config_file () = + match !config_file with + | None -> Sys.getcwd () / "config.ml" + | Some f -> f + +let update_path t root = + { t with jobs = List.map (fun j -> Impl.update_path j root) t.jobs } + +let register ?tracing name jobs = + let root = match !config_file with + | None -> failwith "no config file" + | Some f -> Filename.dirname f in + t := Some { name; jobs; root; tracing } + +let registered () = + match !t with + | None -> { name = "empty"; jobs = []; root = Sys.getcwd (); tracing = None } + | Some t -> t + +let ps = ref StringSet.empty + +let add_to_opam_packages p = + ps := StringSet.union (StringSet.of_list p) !ps + +let packages t = + let m = match !mode with + | `Unix | `MacOSX -> "mirage-unix" + | `Xen -> "mirage-xen" in + let ps = StringSet.add m !ps in + let ps = match t.tracing with + | None -> ps + | Some tracing -> StringSet.union (Tracing.packages tracing) ps in + let ps = List.fold_left (fun set j -> + let ps = StringSet.of_list (Impl.packages j) in + StringSet.union ps set + ) ps t.jobs in + StringSet.elements ps + +let ls = ref StringSet.empty + +let add_to_ocamlfind_libraries l = + ls := StringSet.union !ls (StringSet.of_list l) + +let libraries t = + let m = match !mode with + | `Unix | `MacOSX -> "mirage-types.lwt" + | `Xen -> "mirage-types.lwt" in + let ls = StringSet.add m !ls in + let ls = match t.tracing with + | None -> ls + | Some tracing -> StringSet.union (Tracing.libraries tracing) ls in + let ls = List.fold_left (fun set j -> + let ls = StringSet.of_list (Impl.libraries j) in + StringSet.union ls set + ) ls t.jobs in + StringSet.elements ls + +let configure_myocamlbuild_ml t = + let minor, major = ocaml_version () in + if minor < 4 || major < 1 then ( + (* Previous ocamlbuild versions weren't able to understand the + --output-obj rules *) + let file = t.root / "myocamlbuild.ml" in + let oc = open_out file in + append oc "(* %s *)" generated_by_mirage; + newline oc; + append oc + "open Ocamlbuild_pack;;\n\ + open Ocamlbuild_plugin;;\n\ + open Ocaml_compiler;;\n\ + \n\ + let native_link_gen linker =\n\ + \ link_gen \"cmx\" \"cmxa\" !Options.ext_lib [!Options.ext_obj; \"cmi\"] linker;;\n\ + \n\ + let native_output_obj x = native_link_gen ocamlopt_link_prog\n\ + \ (fun tags -> tags++\"ocaml\"++\"link\"++\"native\"++\"output_obj\") x;;\n\ + \n\ + rule \"ocaml: cmx* & o* -> native.o\"\n\ + \ ~tags:[\"ocaml\"; \"native\"; \"output_obj\" ]\n\ + \ ~prod:\"%%.native.o\" ~deps:[\"%%.cmx\"; \"%%.o\"]\n\ + \ (native_output_obj \"%%.cmx\" \"%%.native.o\");;\n\ + \n\ + \n\ + let byte_link_gen = link_gen \"cmo\" \"cma\" \"cma\" [\"cmo\"; \"cmi\"];;\n\ + let byte_output_obj = byte_link_gen ocamlc_link_prog\n\ + \ (fun tags -> tags++\"ocaml\"++\"link\"++\"byte\"++\"output_obj\");;\n\ + \n\ + rule \"ocaml: cmo* -> byte.o\"\n\ + \ ~tags:[\"ocaml\"; \"byte\"; \"link\"; \"output_obj\" ]\n\ + ~prod:\"%%.byte.o\" ~dep:\"%%.cmo\"\n\ + \ (byte_output_obj \"%%.cmo\" \"%%.byte.o\");;"; + close_out oc + ) + +let clean_myocamlbuild_ml t = + remove (t.root / "myocamlbuild.ml") + +let configure_main_libvirt_xml t = + let file = t.root / t.name ^ "_libvirt.xml" in + let oc = open_out file in + append oc "" generated_by_mirage; + append oc ""; + append oc " %s" t.name; + append oc " 262144"; + append oc " 262144"; + append oc " 1"; + append oc " "; + append oc " linux"; + append oc " %s/mir-%s.xen" t.root t.name; + append oc " "; (* the libxl driver currently needs an empty cmdline to be able to start the domain on arm - due to this? http://lists.xen.org/archives/html/xen-devel/2014-02/msg02375.html *) + append oc " "; + append oc " "; + append oc " preserve"; + append oc " "; + append oc " "; + append oc " "; + append oc " "; + append oc " "; + append oc " "; + append oc " "; + append oc " "; + append oc ""; + close_out oc + +let clean_main_libvirt_xml t = + remove (t.root / t.name ^ "_libvirt.xml") + +let configure_main_xl t = + let file = t.root / t.name ^ ".xl" in + let oc = open_out file in + append oc "# %s" generated_by_mirage; + newline oc; + append oc "name = '%s'" t.name; + append oc "kernel = '%s/mir-%s.xen'" t.root t.name; + append oc "builder = 'linux'"; + append oc "memory = 256"; + append oc "on_crash = 'preserve'"; + newline oc; + append oc "# You must define the network and block interfaces manually."; + newline oc; + append oc "# The disk configuration is defined here:"; + append oc "# http://xenbits.xen.org/docs/4.3-testing/misc/xl-disk-configuration.txt"; + append oc "# An example would look like:"; + append oc "# disk = [ '/dev/loop0,,xvda' ]"; + newline oc; + append oc "# The network configuration is defined here:"; + append oc "# http://xenbits.xen.org/docs/4.3-testing/misc/xl-network-configuration.html"; + append oc "# An example would look like:"; + append oc "# vif = [ 'mac=c0:ff:ee:c0:ff:ee,bridge=br0' ]"; + close_out oc + +let clean_main_xl t = + remove (t.root / t.name ^ ".xl") + +let configure_main_xe t = + let file = t.root / t.name ^ ".xe" in + let oc = open_out file in + append oc "#!/bin/sh"; + append oc "# %s" generated_by_mirage; + newline oc; + append oc "set -e"; + newline oc; + append oc "# Dependency: xe"; + append oc "command -v xe >/dev/null 2>&1 || { echo >&2 \"I require xe but it's not installed. Aborting.\"; exit 1; }"; + append oc "# Dependency: xe-unikernel-upload"; + append oc "command -v xe-unikernel-upload >/dev/null 2>&1 || { echo >&2 \"I require xe-unikernel-upload but it's not installed. Aborting.\"; exit 1; }"; + append oc "# Dependency: a $HOME/.xe"; + append oc "if [ ! -e $HOME/.xe ]; then"; + append oc " echo Please create a config file for xe in $HOME/.xe which contains:"; + append oc " echo server=''"; + append oc " echo username=root"; + append oc " echo password=password"; + append oc " exit 1"; + append oc "fi"; + newline oc; + append oc "echo Uploading VDI containing unikernel"; + append oc "VDI=$(xe-unikernel-upload --path %s/mir-%s.xen)" t.root t.name; + append oc "echo VDI=$VDI"; + append oc "echo Creating VM metadata"; + append oc "VM=$(xe vm-create name-label=%s)" t.name; + append oc "echo VM=$VM"; + append oc "xe vm-param-set uuid=$VM PV-bootloader=pygrub"; + append oc "echo Adding network interface connected to xenbr0"; + append oc "ETH0=$(xe network-list bridge=xenbr0 params=uuid --minimal)"; + append oc "VIF=$(xe vif-create vm-uuid=$VM network-uuid=$ETH0 device=0)"; + append oc "echo Atting block device and making it bootable"; + append oc "VBD=$(xe vbd-create vm-uuid=$VM vdi-uuid=$VDI device=0)"; + append oc "xe vbd-param-set uuid=$VBD bootable=true"; + append oc "xe vbd-param-set uuid=$VBD other-config:owner=true"; + append oc "echo Starting VM"; + append oc "xe vm-start vm=%s" t.name; + close_out oc; + Unix.chmod file 0o755 + +let clean_main_xe t = + remove (t.root / t.name ^ ".xe") + +(* Get the linker flags for any extra C objects we depend on. + * This is needed when building a Xen image as we do the link manually. *) +let get_extra_ld_flags ~filter pkgs = + let output = read_command + "ocamlfind query -r -format '%%d\t%%(xen_linkopts)' -predicates native %s" + (String.concat " " pkgs) in + split output '\n' + |> List.fold_left (fun acc line -> + match cut_at line '\t' with + | None -> acc + | Some (dir, ldflags) -> Printf.sprintf "-L%s %s" dir ldflags :: acc + ) [] + +let configure_makefile t = + let file = t.root / "Makefile" in + let pkgs = "lwt.syntax" :: libraries t in + let libraries_str = + match pkgs with + | [] -> "" + | ls -> "-pkgs " ^ String.concat "," ls in + let packages = String.concat " " (packages t) in + let oc = open_out file in + append oc "# %s" generated_by_mirage; + newline oc; + append oc "LIBS = %s" libraries_str; + append oc "PKGS = %s" packages; + begin match !mode with + | `Xen -> + append oc "SYNTAX = -tags \"syntax(camlp4o),annot,bin_annot,strict_sequence,principal\"\n"; + append oc "SYNTAX += -tag-line \": -syntax(camlp4o)\"\n"; + append oc "FLAGS = -cflag -g -lflags -g,-linkpkg,-dontlink,unix\n"; + append oc "XENLIB = $(shell ocamlfind query mirage-xen)\n" + | `Unix -> + append oc "SYNTAX = -tags \"syntax(camlp4o),annot,bin_annot,strict_sequence,principal\"\n"; + append oc "SYNTAX += -tag-line \": -syntax(camlp4o)\"\n"; + append oc "FLAGS = -cflag -g -lflags -g,-linkpkg\n" + | `MacOSX -> + append oc "SYNTAX = -tags \"syntax(camlp4o),annot,bin_annot,strict_sequence,principal,thread\"\n"; + append oc "SYNTAX += -tag-line \": -syntax(camlp4o)\"\n"; + append oc "FLAGS = -cflag -g -lflags -g,-linkpkg\n" + end; + append oc "BUILD = ocamlbuild -use-ocamlfind $(LIBS) $(SYNTAX) $(FLAGS)\n\ + OPAM = opam\n\n\ + export PKG_CONFIG_PATH=$(shell opam config var prefix)/lib/pkgconfig\n\n\ + export OPAMVERBOSE=1\n\ + export OPAMYES=1"; + newline oc; + append oc ".PHONY: all depend clean build main.native\n\ + all: build\n\ + \n\ + depend:\n\ + \t$(OPAM) install $(PKGS) --verbose\n\ + \n\ + main.native:\n\ + \t$(BUILD) main.native\n\ + \n\ + main.native.o:\n\ + \t$(BUILD) main.native.o"; + newline oc; + + (* On ARM, we must convert the ELF image to an ARM boot executable zImage, + * while on x86 we leave it as it is. *) + let generate_image = + let need_zImage = + match uname_m () with + | Some machine -> String.length machine > 2 && String.sub machine 0 3 = "arm" + | None -> failwith "uname -m failed; can't determine target machine type!" in + if need_zImage then ( + Printf.sprintf "\t -o mir-%s.elf\n\ + \tobjcopy -O binary mir-%s.elf mir-%s.xen" + t.name t.name t.name + ) else ( + Printf.sprintf "\t -o mir-%s.xen" t.name + ) in + + begin match !mode with + | `Xen -> + let filter = function + | "unix" | "bigarray" |"shared_memory_ring_stubs" -> false (* Provided by mirage-xen instead. *) + | _ -> true in + let extra_c_archives = + get_extra_ld_flags ~filter pkgs + |> String.concat " \\\n\t " in + + append oc "build: main.native.o"; + let pkg_config_deps = "mirage-xen" in + append oc "\tpkg-config --print-errors --exists %s" pkg_config_deps; + append oc "\tld -d -static -nostdlib \\\n\ + \t _build/main.native.o \\\n\ + \t %s \\\n\ + \t $$(pkg-config --static --libs %s) \\\n\ + \t $(shell gcc -print-libgcc-file-name) \\\n\ + %s" + extra_c_archives pkg_config_deps generate_image; + | `Unix | `MacOSX -> + append oc "build: main.native"; + append oc "\tln -nfs _build/main.native mir-%s" t.name; + end; + newline oc; + append oc "run: build"; + begin match !mode with + | `Xen -> + append oc "\t@echo %s.xl has been created. Edit it to add VIFs or VBDs" t.name; + append oc "\t@echo Then do something similar to: xl create -c %s.xl\n" t.name + | `Unix | `MacOSX -> + append oc "\t$(SUDO) ./mir-%s\n" t.name + end; + append oc "clean:\n\ + \tocamlbuild -clean"; + close_out oc + +let clean_makefile t = + remove (t.root / "Makefile") + +let no_opam_version_check_ = ref false +let no_opam_version_check b = no_opam_version_check_ := b + +let configure_opam t = + info "Installing OPAM packages."; + match packages t with + | [] -> () + | ps -> + if command_exists "opam" then + if !no_opam_version_check_ then () + else ( + let opam_version = read_command "opam --version" in + let version_error () = + error "Your version of opam: %s is not up-to-date. \ + Please update to (at least) 1.2." opam_version + in + match split opam_version '.' with + | major::minor::_ -> + let major = try int_of_string major with Failure _ -> 0 in + let minor = try int_of_string minor with Failure _ -> 0 in + if (major, minor) >= (1, 2) then opam "install" ps else version_error () + | _ -> version_error () + ) + else error "OPAM is not installed." + +let clean_opam t = + () +(* This is a bit too agressive, disabling for now on. + let (++) = StringSet.union in + let set mode = StringSet.of_list (packages t mode) in + let packages = + set (`Unix `Socket) ++ set (`Unix `Direct) ++ set `Xen in + match StringSet.elements packages with + | [] -> () + | ps -> + if cmd_exists "opam" then opam "remove" ps + else error "OPAM is not installed." +*) + +let manage_opam_packages_ = ref true +let manage_opam_packages b = manage_opam_packages_ := b + +let configure_job j = + let name = Impl.name j in + let module_name = Impl.module_name j in + let param_names = Impl.names j in + append_main "let %s () =" name; + List.iter (fun p -> + append_main " %s () >>= function" p; + append_main " | `Error e -> %s" (driver_initialisation_error p); + append_main " | `Ok %s ->" p; + ) (dedup param_names); + append_main " %s.start %s" module_name (String.concat " " param_names); + newline_main () + +let configure_main t = + info "%s main.ml" (blue_s "Generating:"); + set_main_ml (t.root / "main.ml"); + append_main "(* %s *)" generated_by_mirage; + newline_main (); + append_main "open Lwt"; + newline_main (); + append_main "let _ = Printexc.record_backtrace true"; + newline_main (); + begin match t.tracing with + | None -> () + | Some tracing -> Tracing.configure tracing end; + List.iter (fun j -> Impl.configure j) t.jobs; + List.iter configure_job t.jobs; + let names = List.map (fun j -> Printf.sprintf "%s ()" (Impl.name j)) t.jobs in + append_main "let () ="; + append_main " OS.Main.run (join [%s])" (String.concat "; " names) + +let clean_main t = + List.iter Impl.clean t.jobs; + remove (t.root / "main.ml") + +let configure t = + info "%s %s" (blue_s "Using configuration:") (get_config_file ()); + info "%d job%s [%s]" + (List.length t.jobs) + (if List.length t.jobs = 1 then "" else "s") + (String.concat ", " (List.map Impl.functor_name t.jobs)); + in_dir t.root (fun () -> + if !manage_opam_packages_ then configure_opam t; + configure_myocamlbuild_ml t; + configure_makefile t; + configure_main_xl t; + configure_main_xe t; + configure_main_libvirt_xml t; + configure_main t + ) + +let make () = + match uname_s () with + | Some ("FreeBSD" | "OpenBSD" | "NetBSD" | "DragonFly") -> "gmake" + | _ -> "make" + +let build t = + info "Build: %s" (blue_s (get_config_file ())); + in_dir t.root (fun () -> + command "%s build" (make ()) + ) + +let run t = + info "Run: %s" (blue_s (get_config_file ())); + in_dir t.root (fun () -> + command "%s run" (make ()) + ) + +let clean t = + info "Clean: %s" (blue_s (get_config_file ())); + in_dir t.root (fun () -> + if !manage_opam_packages_ then clean_opam t; + clean_myocamlbuild_ml t; + clean_makefile t; + clean_main_xl t; + clean_main_xe t; + clean_main_libvirt_xml t; + clean_main t; + command "rm -rf %s/_build" t.root; + command "rm -rf log %s/main.native.o %s/main.native %s/mir-%s %s/*~" + t.root t.root t.root t.name t.root; + ) + +(* Compile the configuration file and attempt to dynlink it. + * It is responsible for registering an application via + * [Mirage_config.register] in order to have an observable + * side effect to this command. *) +let compile_and_dynlink file = + info "%s %s" (blue_s "Processing:") file; + let root = Filename.dirname file in + let file = Filename.basename file in + let file = Dynlink.adapt_filename file in + command "rm -rf %s/_build/%s.*" root (Filename.chop_extension file); + command "cd %s && ocamlbuild -use-ocamlfind -tags annot,bin_annot -pkg mirage %s" root file; + try Dynlink.loadfile (String.concat "/" [root; "_build"; file]) + with Dynlink.Error err -> error "Error loading config: %s" (Dynlink.error_message err) + +(* If a configuration file is specified, then use that. + * If not, then scan the curdir for a `config.ml` file. + * If there is more than one, then error out. *) +let scan_conf = function + | Some f -> + info "%s %s" (blue_s "Using specified config file:") f; + if not (Sys.file_exists f) then error "%s does not exist, stopping." f; + realpath f + | None -> + let files = Array.to_list (Sys.readdir ".") in + match List.filter ((=) "config.ml") files with + | [] -> error "No configuration file config.ml found.\n\ + You'll need to create one to let Mirage know what to do." + | [f] -> + info "%s %s" (blue_s "Using scanned config file:") f; + realpath f + | _ -> error "There is more than one config.ml in the current working directory.\n\ + Please specify one explictly on the command-line." + +let load file = + reset (); + let file = scan_conf file in + let root = realpath (Filename.dirname file) in + let file = root / Filename.basename file in + info "%s %s" (blue_s "Compiling for target:") (string_of_mode !mode); + set_config_file file; + compile_and_dynlink file; + let t = registered () in + set_section t.name; + update_path t root diff --git a/samples/OCaml/reload.ml b/samples/OCaml/reload.ml new file mode 100644 index 00000000..510f201f --- /dev/null +++ b/samples/OCaml/reload.ml @@ -0,0 +1,125 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 2000 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +open Cmm +open Arch +open Reg +open Mach + +(* Reloading for the AMD64 *) + +(* Summary of instruction set constraints: + "S" means either stack or register, "R" means register only. + Operation Res Arg1 Arg2 + Imove R S + or S R + Iconst_int S if 32-bit signed, R otherwise + Iconst_float R + Iconst_symbol (not PIC) S + Iconst_symbol (PIC) R + Icall_ind R + Itailcall_ind R + Iload R R R + Istore R R + Iintop(Icomp) R R S + or S S R + Iintop(Imul|Idiv|mod) R R S + Iintop(shift) S S R + Iintop(others) R R S + or S S R + Iintop_imm(Iadd, n)/lea R R + Iintop_imm(others) S S + Inegf...Idivf R R S + Ifloatofint R S + Iintoffloat R S + Ispecific(Ilea) R R R + Ispecific(Ifloatarithmem) R R R + + Conditional branches: + Iinttest S R + or R S + Ifloattest R S (or S R if swapped test) + other tests S +*) + +let stackp r = + match r.loc with + Stack _ -> true + | _ -> false + +class reload = object (self) + +inherit Reloadgen.reload_generic as super + +method! reload_operation op arg res = + match op with + | Iintop(Iadd|Isub|Iand|Ior|Ixor|Icomp _|Icheckbound) -> + (* One of the two arguments can reside in the stack, but not both *) + if stackp arg.(0) && stackp arg.(1) + then ([|arg.(0); self#makereg arg.(1)|], res) + else (arg, res) + | Iintop_imm(Iadd, _) when arg.(0).loc <> res.(0).loc -> + (* This add will be turned into a lea; args and results must be + in registers *) + super#reload_operation op arg res + | Iintop(Idiv | Imod | Ilsl | Ilsr | Iasr) + | Iintop_imm(_, _) -> + (* The argument(s) and results can be either in register or on stack *) + (* Note: Idiv, Imod: arg(0) and res(0) already forced in regs + Ilsl, Ilsr, Iasr: arg(1) already forced in regs *) + (arg, res) + | Iintop(Imul) | Iaddf | Isubf | Imulf | Idivf -> + (* First argument (= result) must be in register, second arg + can reside in the stack *) + if stackp arg.(0) + then (let r = self#makereg arg.(0) in ([|r; arg.(1)|], [|r|])) + else (arg, res) + | Ifloatofint | Iintoffloat -> + (* Result must be in register, but argument can be on stack *) + (arg, (if stackp res.(0) then [| self#makereg res.(0) |] else res)) + | Iconst_int n -> + if n <= 0x7FFFFFFFn && n >= -0x80000000n + then (arg, res) + else super#reload_operation op arg res + | Iconst_symbol _ -> + if !pic_code || !Clflags.dlcode + then super#reload_operation op arg res + else (arg, res) + | _ -> (* Other operations: all args and results in registers *) + super#reload_operation op arg res + +method! reload_test tst arg = + match tst with + Iinttest cmp -> + (* One of the two arguments can reside on stack *) + if stackp arg.(0) && stackp arg.(1) + then [| self#makereg arg.(0); arg.(1) |] + else arg + | Ifloattest((Clt|Cle), _) -> + (* Cf. emit.mlp: we swap arguments in this case *) + (* First argument can be on stack, second must be in register *) + if stackp arg.(1) + then [| arg.(0); self#makereg arg.(1) |] + else arg + | Ifloattest((Ceq|Cne|Cgt|Cge), _) -> + (* Second argument can be on stack, first must be in register *) + if stackp arg.(0) + then [| self#makereg arg.(0); arg.(1) |] + else arg + | _ -> + (* The argument(s) can be either in register or on stack *) + arg + +end + +let fundecl f = + (new reload)#fundecl f diff --git a/samples/OCaml/sigset.ml b/samples/OCaml/sigset.ml new file mode 100644 index 00000000..ba5e1092 --- /dev/null +++ b/samples/OCaml/sigset.ml @@ -0,0 +1,70 @@ +(* + * Copyright (c) 2013 Jeremy Yallop. + * + * This file is distributed under the terms of the MIT License. + * See the file LICENSE for details. + *) + +open PosixTypes +open Ctypes +open Foreign + +type t = sigset_t ptr + +let t = ptr sigset_t + +(* This function initializes the signal set set to exclude all of the defined + signals. It always returns 0. *) +let sigemptyset = foreign "sigemptyset" (ptr sigset_t @-> returning int) + +let empty () = + let setp = allocate_n ~count:1 sigset_t in begin + ignore (sigemptyset setp); + setp + end + +(* This function initializes the signal set set to include all of the defined + signals. Again, the return value is 0. *) +let sigfillset = foreign "sigfillset" (ptr sigset_t @-> returning int) + +let full () = + let setp = allocate_n ~count:1 sigset_t in begin + ignore (sigfillset setp); + setp + end + +(* This function adds the signal signum to the signal set set. All sigaddset + does is modify set; it does not block or unblock any signals. + + The return value is 0 on success and -1 on failure. The following errno + error condition is defined for this function: + + EINVAL The signum argument doesn't specify a valid signal. +*) +let sigaddset = foreign "sigaddset" ~check_errno:true + (ptr sigset_t @-> int @-> returning int) + +let add set signal = ignore (sigaddset set signal) + +(* This function removes the signal signum from the signal set set. All + sigdelset does is modify set; it does not block or unblock any signals. + + The return value and error conditions are the same as for + sigaddset. *) +let sigdelset = foreign "sigdelset" ~check_errno:true + (ptr sigset_t @-> int @-> returning int) + +let del set signal = ignore (sigdelset set signal) + +(* The sigismember function tests whether the signal signum is a member of the + signal set set. It returns 1 if the signal is in the set, 0 if not, and -1 if + there is an error. + + The following errno error condition is defined for this function: + + EINVAL The signum argument doesn't specify a valid signal. +*) +let sigismember = foreign "sigismember" ~check_errno:true + (ptr sigset_t @-> int @-> returning int) + +let mem set signal = sigismember set signal <> 0 diff --git a/samples/OCaml/uutf.ml b/samples/OCaml/uutf.ml new file mode 100644 index 00000000..ff12d954 --- /dev/null +++ b/samples/OCaml/uutf.ml @@ -0,0 +1,810 @@ +(*--------------------------------------------------------------------------- + Copyright 2012 Daniel C. Bünzli. All rights reserved. + Distributed under the BSD3 license, see license at the end of the file. + %%NAME%% release %%VERSION%% + ---------------------------------------------------------------------------*) + +let io_buffer_size = 65536 (* IO_BUFFER_SIZE 4.0.0 *) + +let pp = Format.fprintf +let invalid_encode () = invalid_arg "expected `Await encode" +let invalid_bounds j l = + invalid_arg (Printf.sprintf "invalid bounds (index %d, length %d)" j l) + +(* Unsafe string byte manipulations. If you don't believe the author's + invariants, replacing with safe versions makes everything safe in + the module. He won't be upset. *) + +let unsafe_chr = Char.unsafe_chr +let unsafe_blit = String.unsafe_blit +let unsafe_array_get = Array.unsafe_get +let unsafe_byte s j = Char.code (String.unsafe_get s j) +let unsafe_set_byte s j byte = String.unsafe_set s j (Char.unsafe_chr byte) + +(* Unicode characters *) + +type uchar = int +let u_bom = 0xFEFF (* BOM. *) +let u_rep = 0xFFFD (* replacement character. *) +let is_uchar cp = + (0x0000 <= cp && cp <= 0xD7FF) || (0xE000 <= cp && cp <= 0x10FFFF) + +let pp_cp ppf cp = + if cp < 0 || cp > 0x10FFFF then pp ppf "U+Invalid(%X)" cp else + if cp <= 0xFFFF then pp ppf "U+%04X" cp else + pp ppf "U+%X" cp + +let cp_to_string cp = (* NOT thread safe. *) + pp Format.str_formatter "%a" pp_cp cp; Format.flush_str_formatter () + +(* Unicode encoding schemes *) + +type encoding = [ `UTF_8 | `UTF_16 | `UTF_16BE | `UTF_16LE ] +type decoder_encoding = [ encoding | `US_ASCII | `ISO_8859_1 ] + +let encoding_of_string s = match String.uppercase s with (* IANA names. *) +| "UTF-8" -> Some `UTF_8 +| "UTF-16" -> Some `UTF_16 +| "UTF-16LE" -> Some `UTF_16LE +| "UTF-16BE" -> Some `UTF_16BE +| "ANSI_X3.4-1968" | "ISO-IR-6" | "ANSI_X3.4-1986" | "ISO_646.IRV:1991" +| "ASCII" | "ISO646-US" | "US-ASCII" | "US" | "IBM367" | "CP367" | "CSASCII" -> + Some `US_ASCII +| "ISO_8859-1:1987" | "ISO-IR-100" | "ISO_8859-1" | "ISO-8859-1" +| "LATIN1" | "L1" | "IBM819" | "CP819" | "CSISOLATIN1" -> + Some `ISO_8859_1 +| _ -> None + +let encoding_to_string = function +| `UTF_8 -> "UTF-8" | `UTF_16 -> "UTF-16" | `UTF_16BE -> "UTF-16BE" +| `UTF_16LE -> "UTF-16LE" | `US_ASCII -> "US-ASCII" +| `ISO_8859_1 -> "ISO-8859-1" + +(* Base character decoders. They assume enough data. *) + +let malformed s j l = `Malformed (String.sub s j l) +let malformed_pair be hi s j l = (* missing or half low surrogate at eoi. *) + let bs1 = String.sub s j l in + let bs0 = String.create 2 in + let j0, j1 = if be then (0, 1) else (1, 0) in + unsafe_set_byte bs0 j0 (hi lsr 8); + unsafe_set_byte bs0 j1 (hi land 0xFF); + `Malformed (bs0 ^ bs1) + +let r_us_ascii s j = + (* assert (0 <= j && j < String.length s); *) + let b0 = unsafe_byte s j in + if b0 <= 127 then `Uchar b0 else malformed s j 1 + +let r_iso_8859_1 s j = + (* assert (0 <= j && j < String.length s); *) + `Uchar (unsafe_byte s j) + +let utf_8_len = [| (* uchar byte length according to first UTF-8 byte. *) + 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; + 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; + 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; + 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; + 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; + 1; 1; 1; 1; 1; 1; 1; 1; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; + 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; + 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; + 0; 0; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; + 2; 2; 2; 2; 2; 2; 2; 2; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; + 4; 4; 4; 4; 4; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0 |] + +let r_utf_8 s j l = + (* assert (0 <= j && 0 <= l && j + l <= String.length s); *) + match l with + | 1 -> `Uchar (unsafe_byte s j) + | 2 -> + let b0 = unsafe_byte s j in let b1 = unsafe_byte s (j + 1) in + if b1 lsr 6 != 0b10 then malformed s j l else + `Uchar (((b0 land 0x1F) lsl 6) lor (b1 land 0x3F)) + | 3 -> + let b0 = unsafe_byte s j in let b1 = unsafe_byte s (j + 1) in + let b2 = unsafe_byte s (j + 2) in + let c = `Uchar (((b0 land 0x0F) lsl 12) lor + ((b1 land 0x3F) lsl 6) lor + (b2 land 0x3F)) + in + if b2 lsr 6 != 0b10 then malformed s j l else + begin match b0 with + | 0xE0 -> if b1 < 0xA0 || 0xBF < b1 then malformed s j l else c + | 0xED -> if b1 < 0x80 || 0x9F < b1 then malformed s j l else c + | _ -> if b1 lsr 6 != 0b10 then malformed s j l else c + end + | 4 -> + let b0 = unsafe_byte s j in let b1 = unsafe_byte s (j + 1) in + let b2 = unsafe_byte s (j + 2) in let b3 = unsafe_byte s (j + 3) in + let c = `Uchar (((b0 land 0x07) lsl 18) lor + ((b1 land 0x3F) lsl 12) lor + ((b2 land 0x3F) lsl 6) lor + (b3 land 0x3F)) + in + if b3 lsr 6 != 0b10 || b2 lsr 6 != 0b10 then malformed s j l else + begin match b0 with + | 0xF0 -> if b1 < 0x90 || 0xBF < b1 then malformed s j l else c + | 0xF4 -> if b1 < 0x80 || 0x8F < b1 then malformed s j l else c + | _ -> if b1 lsr 6 != 0b10 then malformed s j l else c + end + | _ -> assert false + +let r_utf_16 s j0 j1 = (* May return a high surrogate. *) + (* assert (0 <= j0 && 0 <= j1 && max j0 j1 < String.length s); *) + let b0 = unsafe_byte s j0 in let b1 = unsafe_byte s j1 in + let u = (b0 lsl 8) lor b1 in + if u < 0xD800 || u > 0xDFFF then `Uchar u else + if u > 0xDBFF then malformed s (min j0 j1) 2 else `Hi u + +let r_utf_16_lo hi s j0 j1 = (* Combines [hi] with a low surrogate. *) + (* assert (0 <= j0 && 0 <= j1 && max j0 j1 < String.length s); *) + let b0 = unsafe_byte s j0 in + let b1 = unsafe_byte s j1 in + let lo = (b0 lsl 8) lor b1 in + if lo < 0xDC00 || lo > 0xDFFF + then malformed_pair (j0 < j1 (* true => be *)) hi s (min j0 j1) 2 + else `Uchar ((((hi land 0x3FF) lsl 10) lor (lo land 0x3FF)) + 0x10000) + +let r_encoding s j l = (* guess encoding with max. 3 bytes. *) + (* assert (0 <= j && 0 <= l && j + l <= String.length s) *) + let some i = if i < l then Some (unsafe_byte s (j + i)) else None in + match (some 0), (some 1), (some 2) with + | Some 0xEF, Some 0xBB, Some 0xBF -> `UTF_8 `BOM + | Some 0xFE, Some 0xFF, _ -> `UTF_16BE `BOM + | Some 0xFF, Some 0xFE, _ -> `UTF_16LE `BOM + | Some 0x00, Some p, _ when p > 0 -> `UTF_16BE (`ASCII p) + | Some p, Some 0x00, _ when p > 0 -> `UTF_16LE (`ASCII p) + | Some u, _, _ when utf_8_len.(u) <> 0 -> `UTF_8 `Decode + | Some _, Some _, _ -> `UTF_16BE `Decode + | Some _, None , None -> `UTF_8 `Decode + | None , None , None -> `UTF_8 `End + | None , Some _, _ -> assert false + | Some _, None , Some _ -> assert false + | None , None , Some _ -> assert false + +(* Decode *) + +type src = [ `Channel of in_channel | `String of string | `Manual ] +type nln = [ `ASCII of uchar | `NLF of uchar | `Readline of uchar ] +type decode = [ `Await | `End | `Malformed of string | `Uchar of uchar] + +let pp_decode ppf = function +| `Uchar u -> pp ppf "@[`Uchar %a@]" pp_cp u +| `End -> pp ppf "`End" +| `Await -> pp ppf "`Await" +| `Malformed bs -> + let l = String.length bs in + pp ppf "@[`Malformed ("; + if l > 0 then pp ppf "%02X" (Char.code (bs.[0])); + for i = 1 to l - 1 do pp ppf " %02X" (Char.code (bs.[i])) done; + pp ppf ")@]" + +type decoder = + { src : src; (* input source. *) + mutable encoding : decoder_encoding; (* decoded encoding. *) + nln : nln option; (* newline normalization (if any). *) + nl : int; (* newline normalization character. *) + mutable i : string; (* current input chunk. *) + mutable i_pos : int; (* input current position. *) + mutable i_max : int; (* input maximal position. *) + t : string; (* four bytes temporary buffer for overlapping reads. *) + mutable t_len : int; (* current byte length of [t]. *) + mutable t_need : int; (* number of bytes needed in [t]. *) + mutable removed_bom : bool; (* [true] if an initial BOM was removed. *) + mutable last_cr : bool; (* [true] if last char was CR. *) + mutable line : int; (* line number. *) + mutable col : int; (* column number. *) + mutable byte_count : int; (* byte count. *) + mutable count : int; (* char count. *) + mutable pp : (* decoder post-processor for BOM, position and nln. *) + decoder -> [ `Malformed of string | `Uchar of uchar ] -> decode; + mutable k : decoder -> decode } (* decoder continuation. *) + +(* On decodes that overlap two (or more) [d.i] buffers, we use [t_fill] to copy + the input data to [d.t] and decode from there. If the [d.i] buffers are not + too small this is faster than continuation based byte per byte writes. + + End of input (eoi) is signalled by [d.i_pos = 0] and [d.i_max = min_int] + which implies that [i_rem d < 0] is [true]. *) + +let i_rem d = d.i_max - d.i_pos + 1 (* remaining bytes to read in [d.i]. *) +let eoi d = d.i <- ""; d.i_pos <- 0; d.i_max <- min_int (* set eoi in [d]. *) +let src d s j l = (* set [d.i] with [s]. *) + if (j < 0 || l < 0 || j + l > String.length s) then invalid_bounds j l else + if (l = 0) then eoi d else + (d.i <- s; d.i_pos <- j; d.i_max <- j + l - 1) + +let refill k d = match d.src with (* get new input in [d.i] and [k]ontinue. *) +| `Manual -> d.k <- k; `Await +| `String _ -> eoi d; k d +| `Channel ic -> + let rc = input ic d.i 0 (String.length d.i) in + (src d d.i 0 rc; k d) + +let t_need d need = d.t_len <- 0; d.t_need <- need +let rec t_fill k d = (* get [d.t_need] bytes (or less if eoi) in [i.t]. *) + let blit d l = + unsafe_blit d.i d.i_pos d.t d.t_len (* write pos. *) l; + d.i_pos <- d.i_pos + l; d.t_len <- d.t_len + l; + in + let rem = i_rem d in + if rem < 0 (* eoi *) then k d else + let need = d.t_need - d.t_len in + if rem < need then (blit d rem; refill (t_fill k) d) else (blit d need; k d) + +let ret k v byte_count d = (* return post-processed [v]. *) + d.k <- k; d.byte_count <- d.byte_count + byte_count; d.pp d v + +(* Decoders. *) + +let rec decode_us_ascii d = + let rem = i_rem d in + if rem <= 0 then (if rem < 0 then `End else refill decode_us_ascii d) else + let j = d.i_pos in + d.i_pos <- d.i_pos + 1; ret decode_us_ascii (r_us_ascii d.i j) 1 d + +let rec decode_iso_8859_1 d = + let rem = i_rem d in + if rem <= 0 then (if rem < 0 then `End else refill decode_iso_8859_1 d) else + let j = d.i_pos in + d.i_pos <- d.i_pos + 1; ret decode_iso_8859_1 (r_iso_8859_1 d.i j) 1 d + +(* UTF-8 decoder *) + +let rec t_decode_utf_8 d = (* decode from [d.t]. *) + if d.t_len < d.t_need + then ret decode_utf_8 (malformed d.t 0 d.t_len) d.t_len d + else ret decode_utf_8 (r_utf_8 d.t 0 d.t_len) d.t_len d + +and decode_utf_8 d = + let rem = i_rem d in + if rem <= 0 then (if rem < 0 then `End else refill decode_utf_8 d) else + let need = unsafe_array_get utf_8_len (unsafe_byte d.i d.i_pos) in + if rem < need then (t_need d need; t_fill t_decode_utf_8 d) else + let j = d.i_pos in + if need = 0 + then (d.i_pos <- d.i_pos + 1; ret decode_utf_8 (malformed d.i j 1) 1 d) + else (d.i_pos <- d.i_pos + need; ret decode_utf_8 (r_utf_8 d.i j need) need d) + +(* UTF-16BE decoder *) + +let rec t_decode_utf_16be_lo hi d = (* decode from [d.t]. *) + let bcount = d.t_len + 2 (* hi count *) in + if d.t_len < d.t_need + then ret decode_utf_16be (malformed_pair true hi d.t 0 d.t_len) bcount d + else ret decode_utf_16be (r_utf_16_lo hi d.t 0 1) bcount d + +and t_decode_utf_16be d = (* decode from [d.t]. *) + if d.t_len < d.t_need + then ret decode_utf_16be (malformed d.t 0 d.t_len) d.t_len d + else decode_utf_16be_lo (r_utf_16 d.t 0 1) d + +and decode_utf_16be_lo v d = match v with +| `Uchar _ | `Malformed _ as v -> ret decode_utf_16be v 2 d +| `Hi hi -> + let rem = i_rem d in + if rem < 2 then (t_need d 2; t_fill (t_decode_utf_16be_lo hi) d) else + let j = d.i_pos in + d.i_pos <- d.i_pos + 2; + ret decode_utf_16be (r_utf_16_lo hi d.i j (j + 1)) 4 d + +and decode_utf_16be d = + let rem = i_rem d in + if rem <= 0 then (if rem < 0 then `End else refill decode_utf_16be d) else + if rem < 2 then (t_need d 2; t_fill t_decode_utf_16be d) else + let j = d.i_pos in + d.i_pos <- d.i_pos + 2; decode_utf_16be_lo (r_utf_16 d.i j (j + 1)) d + +(* UTF-16LE decoder, same as UTF-16BE with byte swapped. *) + +let rec t_decode_utf_16le_lo hi d = (* decode from [d.t]. *) + let bcount = d.t_len + 2 (* hi count *) in + if d.t_len < d.t_need + then ret decode_utf_16le (malformed_pair false hi d.t 0 d.t_len) bcount d + else ret decode_utf_16le (r_utf_16_lo hi d.t 1 0) bcount d + +and t_decode_utf_16le d = (* decode from [d.t]. *) + if d.t_len < d.t_need + then ret decode_utf_16le (malformed d.t 0 d.t_len) d.t_len d + else decode_utf_16le_lo (r_utf_16 d.t 1 0) d + +and decode_utf_16le_lo v d = match v with +| `Uchar _ | `Malformed _ as v -> ret decode_utf_16le v 2 d +| `Hi hi -> + let rem = i_rem d in + if rem < 2 then (t_need d 2; t_fill (t_decode_utf_16le_lo hi) d) else + let j = d.i_pos in + d.i_pos <- d.i_pos + 2; + ret decode_utf_16le (r_utf_16_lo hi d.i (j + 1) j) 4 d + +and decode_utf_16le d = + let rem = i_rem d in + if rem <= 0 then (if rem < 0 then `End else refill decode_utf_16le d) else + if rem < 2 then (t_need d 2; t_fill t_decode_utf_16le d) else + let j = d.i_pos in + d.i_pos <- d.i_pos + 2; decode_utf_16le_lo (r_utf_16 d.i (j + 1) j) d + +(* Encoding guessing. The guess is simple but starting the decoder + after is tedious, uutf's decoders are not designed to put bytes + back in the stream. *) + +let guessed_utf_8 d = (* start decoder after `UTF_8 guess. *) + let b3 d = (* handles the third read byte. *) + let b3 = unsafe_byte d.t 2 in + match utf_8_len.(b3) with + | 0 -> ret decode_utf_8 (malformed d.t 2 1) 1 d + | n -> + d.t_need <- n; d.t_len <- 1; unsafe_set_byte d.t 0 b3; + t_fill t_decode_utf_8 d + in + let b2 d = (* handle second read byte. *) + let b2 = unsafe_byte d.t 1 in + let b3 = if d.t_len > 2 then b3 else decode_utf_8 (* decodes `End *) in + match utf_8_len.(b2) with + | 0 -> ret b3 (malformed d.t 1 1) 1 d + | 1 -> ret b3 (r_utf_8 d.t 1 1) 1 d + | n -> (* copy d.t.(1-2) to d.t.(0-1) and decode *) + d.t_need <- n; + unsafe_set_byte d.t 0 b2; + if (d.t_len < 3) then d.t_len <- 1 else + (d.t_len <- 2; unsafe_set_byte d.t 1 (unsafe_byte d.t 2); ); + t_fill t_decode_utf_8 d + in + let b1 = unsafe_byte d.t 0 in (* handle first read byte. *) + let b2 = if d.t_len > 1 then b2 else decode_utf_8 (* decodes `End *) in + match utf_8_len.(b1) with + | 0 -> ret b2 (malformed d.t 0 1) 1 d + | 1 -> ret b2 (r_utf_8 d.t 0 1) 1 d + | 2 -> + if d.t_len < 2 then ret decode_utf_8 (malformed d.t 0 1) 1 d else + if d.t_len < 3 then ret decode_utf_8 (r_utf_8 d.t 0 2) 2 d else + ret b3 (r_utf_8 d.t 0 2) 2 d + | 3 -> + if d.t_len < 3 + then ret decode_utf_8 (malformed d.t 0 d.t_len) d.t_len d + else ret decode_utf_8 (r_utf_8 d.t 0 3) 3 d + | 4 -> + if d.t_len < 3 + then ret decode_utf_8 (malformed d.t 0 d.t_len) d.t_len d + else (d.t_need <- 4; t_fill t_decode_utf_8 d) + | n -> assert false + +let guessed_utf_16 d be v = (* start decoder after `UTF_16{BE,LE} guess. *) + let decode_utf_16, t_decode_utf_16, t_decode_utf_16_lo, j0, j1 = + if be then decode_utf_16be, t_decode_utf_16be, t_decode_utf_16be_lo, 0, 1 + else decode_utf_16le, t_decode_utf_16le, t_decode_utf_16le_lo, 1, 0 + in + let b3 k d = + if d.t_len < 3 then decode_utf_16 d (* decodes `End *) else + begin (* copy d.t.(2) to d.t.(0) and decode. *) + d.t_need <- 2; d.t_len <- 1; + unsafe_set_byte d.t 0 (unsafe_byte d.t 2); + t_fill k d + end + in + match v with + | `BOM -> ret (b3 t_decode_utf_16) (`Uchar u_bom) 2 d + | `ASCII u -> ret (b3 t_decode_utf_16) (`Uchar u) 2 d + | `Decode -> + match r_utf_16 d.t j0 j1 with + | `Malformed _ | `Uchar _ as v -> ret (b3 t_decode_utf_16) v 2 d + | `Hi hi -> + if d.t_len < 3 + then ret decode_utf_16 (malformed_pair be hi "" 0 0) d.t_len d + else (b3 (t_decode_utf_16_lo hi)) d + +let guess_encoding d = (* guess encoding and start decoder. *) + let setup d = match r_encoding d.t 0 d.t_len with + | `UTF_8 r -> + d.encoding <- `UTF_8; d.k <- decode_utf_8; + begin match r with + | `BOM -> ret decode_utf_8 (`Uchar u_bom) 3 d + | `Decode -> guessed_utf_8 d + | `End -> `End + end + | `UTF_16BE r -> + d.encoding <- `UTF_16BE; d.k <- decode_utf_16be; guessed_utf_16 d true r + | `UTF_16LE r -> + d.encoding <- `UTF_16LE; d.k <- decode_utf_16le; guessed_utf_16 d false r + + in + (t_need d 3; t_fill setup d) + +(* Character post-processors. Used for BOM handling, newline + normalization and position tracking. The [pp_remove_bom] is only + used for the first character to remove a possible initial BOM and + handle UTF-16 endianness recognition. *) + +let nline d = d.col <- 0; d.line <- d.line + 1 (* inlined. *) +let ncol d = d.col <- d.col + 1 (* inlined. *) +let ncount d = d.count <- d.count + 1 (* inlined. *) +let cr d b = d.last_cr <- b (* inlined. *) + +let pp_remove_bom utf16 pp d = function(* removes init. BOM, handles UTF-16. *) +| `Uchar 0xFEFF (* BOM *) -> + if utf16 then (d.encoding <- `UTF_16BE; d.k <- decode_utf_16be); + d.removed_bom <- true; d.pp <- pp; d.k d +| `Uchar 0xFFFE (* BOM reversed from decode_utf_16be *) when utf16 -> + d.encoding <- `UTF_16LE; d.k <- decode_utf_16le; + d.removed_bom <- true; d.pp <- pp; d.k d +| `Malformed _ | `Uchar _ as v -> + d.removed_bom <- false; d.pp <- pp; d.pp d v + +let pp_nln_none d = function +| `Uchar 0x000A (* LF *) as v -> + let last_cr = d.last_cr in + cr d false; ncount d; if last_cr then v else (nline d; v) +| `Uchar 0x000D (* CR *) as v -> cr d true; ncount d; nline d; v +| `Uchar (0x0085 | 0x000C | 0x2028 | 0x2029) (* NEL | FF | LS | PS *) as v -> + cr d false; ncount d; nline d; v +| `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v + +let pp_nln_readline d = function +| `Uchar 0x000A (* LF *) -> + let last_cr = d.last_cr in + cr d false; if last_cr then d.k d else (ncount d; nline d; `Uchar d.nl) +| `Uchar 0x000D (* CR *) -> cr d true; ncount d; nline d; `Uchar d.nl +| `Uchar (0x0085 | 0x000C | 0x2028 | 0x2029) (* NEL | FF | LS | PS *) -> + cr d false; ncount d; nline d; `Uchar d.nl +| `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v + +let pp_nln_nlf d = function +| `Uchar 0x000A (* LF *) -> + let last_cr = d.last_cr in + cr d false; if last_cr then d.k d else (ncount d; nline d; `Uchar d.nl) +| `Uchar 0x000D (* CR *) -> cr d true; ncount d; nline d; `Uchar d.nl +| `Uchar 0x0085 (* NEL *) -> cr d false; ncount d; nline d; `Uchar d.nl +| `Uchar (0x000C | 0x2028 | 0x2029) as v (* FF | LS | PS *) -> + cr d false; ncount d; nline d; v +| `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v + +let pp_nln_ascii d = function +| `Uchar 0x000A (* LF *) -> + let last_cr = d.last_cr in + cr d false; if last_cr then d.k d else (ncount d; nline d; `Uchar d.nl) +| `Uchar 0x000D (* CR *) -> cr d true; ncount d; nline d; `Uchar d.nl +| `Uchar (0x0085 | 0x000C | 0x2028 | 0x2029) as v (* NEL | FF | LS | PS *) -> + cr d false; ncount d; nline d; v +| `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v + +let decode_fun = function +| `UTF_8 -> decode_utf_8 +| `UTF_16 -> decode_utf_16be (* see [pp_remove_bom]. *) +| `UTF_16BE -> decode_utf_16be +| `UTF_16LE -> decode_utf_16le +| `US_ASCII -> decode_us_ascii +| `ISO_8859_1 -> decode_iso_8859_1 + +let decoder ?nln ?encoding src = + let pp, nl = match nln with + | None -> pp_nln_none, 0x000A (* not used. *) + | Some (`ASCII nl) -> pp_nln_ascii, nl + | Some (`NLF nl) -> pp_nln_nlf, nl + | Some (`Readline nl) -> pp_nln_readline, nl + in + let encoding, k = match encoding with + | None -> `UTF_8, guess_encoding + | Some e -> (e :> decoder_encoding), decode_fun e + in + let i, i_pos, i_max = match src with + | `Manual -> "", 1, 0 (* implies src_rem d = 0. *) + | `Channel _ -> String.create io_buffer_size, 1, 0 (* idem. *) + | `String s -> s, 0, String.length s - 1 + in + { src = (src :> src); encoding; nln = (nln :> nln option); nl; + i; i_pos; i_max; t = String.create 4; t_len = 0; t_need = 0; + removed_bom = false; last_cr = false; line = 1; col = 0; + byte_count = 0; count = 0; + pp = pp_remove_bom (encoding = `UTF_16) pp; k } + +let decode d = d.k d +let decoder_line d = d.line +let decoder_col d = d.col +let decoder_byte_count d = d.byte_count +let decoder_count d = d.count +let decoder_removed_bom d = d.removed_bom +let decoder_src d = d.src +let decoder_nln d = d.nln +let decoder_encoding d = d.encoding +let set_decoder_encoding d e = + d.encoding <- (e :> decoder_encoding); d.k <- decode_fun e + +(* Encode *) + +type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] +type encode = [ `Await | `End | `Uchar of uchar ] +type encoder = + { dst : dst; (* output destination. *) + encoding : encoding; (* encoded encoding. *) + mutable o : string; (* current output chunk. *) + mutable o_pos : int; (* next output position to write. *) + mutable o_max : int; (* maximal output position to write. *) + t : string; (* four bytes buffer for overlapping writes. *) + mutable t_pos : int; (* next position to read in [t]. *) + mutable t_max : int; (* maximal position to read in [t]. *) + mutable k : (* encoder continuation. *) + encoder -> encode -> [ `Ok | `Partial ] } + +(* On encodes that overlap two (or more) [e.o] buffers, we encode the + character to the temporary buffer [o.t] and continue with + [tmp_flush] to write this data on the different [e.o] buffers. If + the [e.o] buffers are not too small this is faster than + continuation based byte per byte writes. *) + +let o_rem e = e.o_max - e.o_pos + 1 (* remaining bytes to write in [e.o]. *) +let dst e s j l = (* set [e.o] with [s]. *) + if (j < 0 || l < 0 || j + l > String.length s) then invalid_bounds j l; + e.o <- s; e.o_pos <- j; e.o_max <- j + l - 1 + +let partial k e = function `Await -> k e | `Uchar _ | `End -> invalid_encode () +let flush k e = match e.dst with(* get free storage in [d.o] and [k]ontinue. *) +| `Manual -> e.k <- partial k; `Partial +| `Buffer b -> Buffer.add_substring b e.o 0 e.o_pos; e.o_pos <- 0; k e +| `Channel oc -> output oc e.o 0 e.o_pos; e.o_pos <- 0; k e + +let t_range e max = e.t_pos <- 0; e.t_max <- max +let rec t_flush k e = (* flush [d.t] up to [d.t_max] in [d.i]. *) + let blit e l = + unsafe_blit e.t e.t_pos e.o e.o_pos l; + e.o_pos <- e.o_pos + l; e.t_pos <- e.t_pos + l + in + let rem = o_rem e in + let len = e.t_max - e.t_pos + 1 in + if rem < len then (blit e rem; flush (t_flush k) e) else (blit e len; k e) + +(* Encoders. *) + +let rec encode_utf_8 e v = + let k e = e.k <- encode_utf_8; `Ok in + match v with + | `Await -> k e + | `End -> flush k e + | `Uchar u as v -> + let rem = o_rem e in + if u <= 0x007F then + if rem < 1 then flush (fun e -> encode_utf_8 e v) e else + (unsafe_set_byte e.o e.o_pos u; e.o_pos <- e.o_pos + 1; k e) + else if u <= 0x07FF then + begin + let s, j, k = + if rem < 2 then (t_range e 1; e.t, 0, t_flush k) else + let j = e.o_pos in (e.o_pos <- e.o_pos + 2; e.o, j, k) + in + unsafe_set_byte s j (0xC0 lor (u lsr 6)); + unsafe_set_byte s (j + 1) (0x80 lor (u land 0x3F)); + k e + end + else if u <= 0xFFFF then + begin + let s, j, k = + if rem < 3 then (t_range e 2; e.t, 0, t_flush k) else + let j = e.o_pos in (e.o_pos <- e.o_pos + 3; e.o, j, k) + in + unsafe_set_byte s j (0xE0 lor (u lsr 12)); + unsafe_set_byte s (j + 1) (0x80 lor ((u lsr 6) land 0x3F)); + unsafe_set_byte s (j + 2) (0x80 lor (u land 0x3F)); + k e + end + else + begin + let s, j, k = + if rem < 4 then (t_range e 3; e.t, 0, t_flush k) else + let j = e.o_pos in (e.o_pos <- e.o_pos + 4; e.o, j, k) + in + unsafe_set_byte s j (0xF0 lor (u lsr 18)); + unsafe_set_byte s (j + 1) (0x80 lor ((u lsr 12) land 0x3F)); + unsafe_set_byte s (j + 2) (0x80 lor ((u lsr 6) land 0x3F)); + unsafe_set_byte s (j + 3) (0x80 lor (u land 0x3F)); + k e + end + +let rec encode_utf_16be e v = + let k e = e.k <- encode_utf_16be; `Ok in + match v with + | `Await -> k e + | `End -> flush k e + | `Uchar u -> + let rem = o_rem e in + if u < 0x10000 then + begin + let s, j, k = + if rem < 2 then (t_range e 1; e.t, 0, t_flush k) else + let j = e.o_pos in (e.o_pos <- e.o_pos + 2; e.o, j, k) + in + unsafe_set_byte s j (u lsr 8); + unsafe_set_byte s (j + 1) (u land 0xFF); + k e + end else begin + let s, j, k = + if rem < 4 then (t_range e 3; e.t, 0, t_flush k) else + let j = e.o_pos in (e.o_pos <- e.o_pos + 4; e.o, j, k) + in + let u' = u - 0x10000 in + let hi = (0xD800 lor (u' lsr 10)) in + let lo = (0xDC00 lor (u' land 0x3FF)) in + unsafe_set_byte s j (hi lsr 8); + unsafe_set_byte s (j + 1) (hi land 0xFF); + unsafe_set_byte s (j + 2) (lo lsr 8); + unsafe_set_byte s (j + 3) (lo land 0xFF); + k e + end + +let rec encode_utf_16le e v = (* encode_uft_16be with bytes swapped. *) + let k e = e.k <- encode_utf_16le; `Ok in + match v with + | `Await -> k e + | `End -> flush k e + | `Uchar u -> + let rem = o_rem e in + if u < 0x10000 then + begin + let s, j, k = + if rem < 2 then (t_range e 1; e.t, 0, t_flush k) else + let j = e.o_pos in (e.o_pos <- e.o_pos + 2; e.o, j, k) + in + unsafe_set_byte s j (u land 0xFF); + unsafe_set_byte s (j + 1) (u lsr 8); + k e + end + else + begin + let s, j, k = + if rem < 4 then (t_range e 3; e.t, 0, t_flush k) else + let j = e.o_pos in (e.o_pos <- e.o_pos + 4; e.o, j, k) + in + let u' = u - 0x10000 in + let hi = (0xD800 lor (u' lsr 10)) in + let lo = (0xDC00 lor (u' land 0x3FF)) in + unsafe_set_byte s j (hi land 0xFF); + unsafe_set_byte s (j + 1) (hi lsr 8); + unsafe_set_byte s (j + 2) (lo land 0xFF); + unsafe_set_byte s (j + 3) (lo lsr 8); + k e + end + +let encode_fun = function +| `UTF_8 -> encode_utf_8 +| `UTF_16 -> encode_utf_16be +| `UTF_16BE -> encode_utf_16be +| `UTF_16LE -> encode_utf_16le + +let encoder encoding dst = + let o, o_pos, o_max = match dst with + | `Manual -> "", 1, 0 (* implies o_rem e = 0. *) + | `Buffer _ + | `Channel _ -> String.create io_buffer_size, 0, io_buffer_size - 1 + in + { dst = (dst :> dst); encoding = (encoding :> encoding); o; o_pos; o_max; + t = String.create 4; t_pos = 1; t_max = 0; k = encode_fun encoding} + +let encode e v = e.k e (v :> encode) +let encoder_encoding e = e.encoding +let encoder_dst e = e.dst + +(* Manual sources and destinations. *) + +module Manual = struct + let src = src + let dst = dst + let dst_rem = o_rem +end + +(* Strings folders and Buffer encoders *) + +module String = struct + let encoding_guess s = match r_encoding s 0 (max (String.length s) 3) with + | `UTF_8 d -> `UTF_8, (d = `BOM) + | `UTF_16BE d -> `UTF_16BE, (d = `BOM) + | `UTF_16LE d -> `UTF_16LE, (d = `BOM) + + type 'a folder = + 'a -> int -> [ `Uchar of uchar | `Malformed of string ] -> 'a + + let fold_utf_8 f acc s = + let rec loop acc f s i l = + if i = l then acc else + let need = unsafe_array_get utf_8_len (unsafe_byte s i) in + if need = 0 then loop (f acc i (malformed s i 1)) f s (i + 1) l else + let rem = l - i in + if rem < need then f acc i (malformed s i rem) else + loop (f acc i (r_utf_8 s i need)) f s (i + need) l + in + loop acc f s 0 (String.length s) + + let fold_utf_16be f acc s = + let rec loop acc f s i l = + if i = l then acc else + let rem = l - i in + if rem < 2 then f acc i (malformed s i 1) else + match r_utf_16 s i (i + 1) with + | `Uchar _ | `Malformed _ as v -> loop (f acc i v) f s (i + 2) l + | `Hi hi -> + if rem < 4 then f acc i (malformed s i rem) else + loop (f acc i (r_utf_16_lo hi s (i + 2) (i + 3))) f s (i + 4) l + in + loop acc f s 0 (String.length s) + + let fold_utf_16le f acc s = (* [fold_utf_16be], bytes swapped. *) + let rec loop acc f s i l = + if i = l then acc else + let rem = l - i in + if rem < 2 then f acc i (malformed s i 1) else + match r_utf_16 s (i + 1) i with + | `Uchar _ | `Malformed _ as v -> loop (f acc i v) f s (i + 2) l + | `Hi hi -> + if rem < 4 then f acc i (malformed s i rem) else + loop (f acc i (r_utf_16_lo hi s (i + 3) (i + 2))) f s (i + 4) l + in + loop acc f s 0 (String.length s) +end + +module Buffer = struct + let add_utf_8 b u = + let w byte = Buffer.add_char b (unsafe_chr byte) in (* inlined. *) + if u <= 0x007F then + (w u) + else if u <= 0x07FF then + (w (0xC0 lor (u lsr 6)); + w (0x80 lor (u land 0x3F))) + else if u <= 0xFFFF then + (w (0xE0 lor (u lsr 12)); + w (0x80 lor ((u lsr 6) land 0x3F)); + w (0x80 lor (u land 0x3F))) + else + (w (0xF0 lor (u lsr 18)); + w (0x80 lor ((u lsr 12) land 0x3F)); + w (0x80 lor ((u lsr 6) land 0x3F)); + w (0x80 lor (u land 0x3F))) + + let add_utf_16be b u = + let w byte = Buffer.add_char b (unsafe_chr byte) in (* inlined. *) + if u < 0x10000 then (w (u lsr 8); w (u land 0xFF)) else + let u' = u - 0x10000 in + let hi = (0xD800 lor (u' lsr 10)) in + let lo = (0xDC00 lor (u' land 0x3FF)) in + w (hi lsr 8); w (hi land 0xFF); + w (lo lsr 8); w (lo land 0xFF) + + let add_utf_16le b u = (* swapped add_utf_16be. *) + let w byte = Buffer.add_char b (unsafe_chr byte) in (* inlined. *) + if u < 0x10000 then (w (u land 0xFF); w (u lsr 8)) else + let u' = u - 0x10000 in + let hi = (0xD800 lor (u' lsr 10)) in + let lo = (0xDC00 lor (u' land 0x3FF)) in + w (hi land 0xFF); w (hi lsr 8); + w (lo land 0xFF); w (lo lsr 8) +end + +(*--------------------------------------------------------------------------- + Copyright 2012 Daniel C. Bünzli + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + 3. Neither the name of Daniel C. Bünzli nor the names of + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ---------------------------------------------------------------------------*)