Disambiguate between OCaml and Standard ML

Fix #2208
2026-07-01 00:24:26 +00:00 · 2015-03-13 15:03:48 +00:00
parent b5472ab753
commit e79607372b
9 changed files with 5252 additions and 0 deletions
--- a/lib/linguist/heuristics.rb
+++ b/lib/linguist/heuristics.rb
@@ -261,5 +261,14 @@ module Linguist
        Language["Makefile"]
      end
    end
+
+    disambiguate "OCaml", "Standard ML" do |data|
+      if /module|let rec |match\s+(\S+\s)+with/.match(data)
+        Language["OCaml"]
+      elsif /=> |case\s+(\S+\s)+of/.match(data)
+        Language["Standard ML"]
+      end
+    end
+
  end
 end
--- a/samples/OCaml/cmdliner.ml
+++ b/samples/OCaml/cmdliner.ml
--- a/samples/OCaml/common.ml
+++ b/samples/OCaml/common.ml
@@ -0,0 +1,14 @@
+(*
+ * Copyright (c) 2013 Jeremy Yallop.
+ *
+ * This file is distributed under the terms of the MIT License.
+ * See the file LICENSE for details.
+ *)
+
+let string_of format v = 
+  let buf = Buffer.create 100 in
+  let fmt = Format.formatter_of_buffer buf in begin
+    format fmt v;
+    Format.pp_print_flush fmt ();
+    Buffer.contents buf
+  end
--- a/samples/OCaml/date.ml
+++ b/samples/OCaml/date.ml
@@ -0,0 +1,40 @@
+(*
+ * Copyright (c) 2013 Jeremy Yallop.
+ *
+ * This file is distributed under the terms of the MIT License.
+ * See the file LICENSE for details.
+ *)
+
+open Ctypes
+open PosixTypes
+open Foreign
+
+type tm
+let tm = structure "tm"
+let (-:) ty label = field tm label ty
+let tm_sec   = int -: "tm_sec"   (* seconds *)
+let tm_min   = int -: "tm_min"   (* minutes *)
+let tm_hour  = int -: "tm_hour"  (* hours *)
+let tm_mday  = int -: "tm_mday"  (* day of the month *)
+let tm_mon   = int -: "tm_mon"   (* month *)
+let tm_year  = int -: "tm_year"  (* year *)
+let tm_wday  = int -: "tm_wday"  (* day of the week *)
+let tm_yday  = int -: "tm_yday"  (* day in the year *)
+let tm_isdst = int -: "tm_isdst" (* daylight saving time *)
+let () = seal (tm : tm structure typ)
+
+let time = foreign "time" ~check_errno:true (ptr time_t @-> returning time_t)
+
+let asctime = foreign "asctime" (ptr tm @-> returning string)
+
+let localtime = foreign "localtime" (ptr time_t @-> returning (ptr tm))
+
+let () = begin
+  let timep = allocate_n ~count:1 time_t in
+  let time = time timep in
+  assert (time = !@timep);
+  let tm = localtime timep in
+  Printf.printf "tm.tm_mon  = %d\n" (getf !@tm tm_mon);
+  Printf.printf "tm.tm_year = %d\n" (getf !@tm tm_year);
+  print_endline (asctime tm)
+end
--- a/samples/OCaml/map.ml
+++ b/samples/OCaml/map.ml
@@ -0,0 +1,337 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the GNU Library General Public License, with    *)
+(*  the special exception on linking described in file ../LICENSE.     *)
+(*                                                                     *)
+(***********************************************************************)
+
+module type OrderedType =
+  sig
+    type t
+    val compare: t -> t -> int
+  end
+
+module type S =
+  sig
+    type key
+    type +'a t
+    val empty: 'a t
+    val is_empty: 'a t -> bool
+    val mem:  key -> 'a t -> bool
+    val add: key -> 'a -> 'a t -> 'a t
+    val singleton: key -> 'a -> 'a t
+    val remove: key -> 'a t -> 'a t
+    val merge:
+          (key -> 'a option -> 'b option -> 'c option) -> 'a t -> 'b t -> 'c t
+    val compare: ('a -> 'a -> int) -> 'a t -> 'a t -> int
+    val equal: ('a -> 'a -> bool) -> 'a t -> 'a t -> bool
+    val iter: (key -> 'a -> unit) -> 'a t -> unit
+    val fold: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
+    val for_all: (key -> 'a -> bool) -> 'a t -> bool
+    val exists: (key -> 'a -> bool) -> 'a t -> bool
+    val filter: (key -> 'a -> bool) -> 'a t -> 'a t
+    val partition: (key -> 'a -> bool) -> 'a t -> 'a t * 'a t
+    val cardinal: 'a t -> int
+    val bindings: 'a t -> (key * 'a) list
+    val min_binding: 'a t -> (key * 'a)
+    val max_binding: 'a t -> (key * 'a)
+    val choose: 'a t -> (key * 'a)
+    val split: key -> 'a t -> 'a t * 'a option * 'a t
+    val find: key -> 'a t -> 'a
+    val map: ('a -> 'b) -> 'a t -> 'b t
+    val mapi: (key -> 'a -> 'b) -> 'a t -> 'b t
+  end
+
+module Make(Ord: OrderedType) = struct
+
+    type key = Ord.t
+
+    type 'a t =
+        Empty
+      | Node of 'a t * key * 'a * 'a t * int
+
+    let height = function
+        Empty -> 0
+      | Node(_,_,_,_,h) -> h
+
+    let create l x d r =
+      let hl = height l and hr = height r in
+      Node(l, x, d, r, (if hl >= hr then hl + 1 else hr + 1))
+
+    let singleton x d = Node(Empty, x, d, Empty, 1)
+
+    let bal l x d r =
+      let hl = match l with Empty -> 0 | Node(_,_,_,_,h) -> h in
+      let hr = match r with Empty -> 0 | Node(_,_,_,_,h) -> h in
+      if hl > hr + 2 then begin
+        match l with
+          Empty -> invalid_arg "Map.bal"
+        | Node(ll, lv, ld, lr, _) ->
+            if height ll >= height lr then
+              create ll lv ld (create lr x d r)
+            else begin
+              match lr with
+                Empty -> invalid_arg "Map.bal"
+              | Node(lrl, lrv, lrd, lrr, _)->
+                  create (create ll lv ld lrl) lrv lrd (create lrr x d r)
+            end
+      end else if hr > hl + 2 then begin
+        match r with
+          Empty -> invalid_arg "Map.bal"
+        | Node(rl, rv, rd, rr, _) ->
+            if height rr >= height rl then
+              create (create l x d rl) rv rd rr
+            else begin
+              match rl with
+                Empty -> invalid_arg "Map.bal"
+              | Node(rll, rlv, rld, rlr, _) ->
+                  create (create l x d rll) rlv rld (create rlr rv rd rr)
+            end
+      end else
+        Node(l, x, d, r, (if hl >= hr then hl + 1 else hr + 1))
+
+    let empty = Empty
+
+    let is_empty = function Empty -> true | _ -> false
+
+    let rec add x data = function
+        Empty ->
+          Node(Empty, x, data, Empty, 1)
+      | Node(l, v, d, r, h) ->
+          let c = Ord.compare x v in
+          if c = 0 then
+            Node(l, x, data, r, h)
+          else if c < 0 then
+            bal (add x data l) v d r
+          else
+            bal l v d (add x data r)
+
+    let rec find x = function
+        Empty ->
+          raise Not_found
+      | Node(l, v, d, r, _) ->
+          let c = Ord.compare x v in
+          if c = 0 then d
+          else find x (if c < 0 then l else r)
+
+    let rec mem x = function
+        Empty ->
+          false
+      | Node(l, v, d, r, _) ->
+          let c = Ord.compare x v in
+          c = 0 || mem x (if c < 0 then l else r)
+
+    let rec min_binding = function
+        Empty -> raise Not_found
+      | Node(Empty, x, d, r, _) -> (x, d)
+      | Node(l, x, d, r, _) -> min_binding l
+
+    let rec max_binding = function
+        Empty -> raise Not_found
+      | Node(l, x, d, Empty, _) -> (x, d)
+      | Node(l, x, d, r, _) -> max_binding r
+
+    let rec remove_min_binding = function
+        Empty -> invalid_arg "Map.remove_min_elt"
+      | Node(Empty, x, d, r, _) -> r
+      | Node(l, x, d, r, _) -> bal (remove_min_binding l) x d r
+
+    let merge t1 t2 =
+      match (t1, t2) with
+        (Empty, t) -> t
+      | (t, Empty) -> t
+      | (_, _) ->
+          let (x, d) = min_binding t2 in
+          bal t1 x d (remove_min_binding t2)
+
+    let rec remove x = function
+        Empty ->
+          Empty
+      | Node(l, v, d, r, h) ->
+          let c = Ord.compare x v in
+          if c = 0 then
+            merge l r
+          else if c < 0 then
+            bal (remove x l) v d r
+          else
+            bal l v d (remove x r)
+
+    let rec iter f = function
+        Empty -> ()
+      | Node(l, v, d, r, _) ->
+          iter f l; f v d; iter f r
+
+    let rec map f = function
+        Empty ->
+          Empty
+      | Node(l, v, d, r, h) ->
+          let l' = map f l in
+          let d' = f d in
+          let r' = map f r in
+          Node(l', v, d', r', h)
+
+    let rec mapi f = function
+        Empty ->
+          Empty
+      | Node(l, v, d, r, h) ->
+          let l' = mapi f l in
+          let d' = f v d in
+          let r' = mapi f r in
+          Node(l', v, d', r', h)
+
+    let rec fold f m accu =
+      match m with
+        Empty -> accu
+      | Node(l, v, d, r, _) ->
+          fold f r (f v d (fold f l accu))
+
+    let rec for_all p = function
+        Empty -> true
+      | Node(l, v, d, r, _) -> p v d && for_all p l && for_all p r
+
+    let rec exists p = function
+        Empty -> false
+      | Node(l, v, d, r, _) -> p v d || exists p l || exists p r
+
+    (* Beware: those two functions assume that the added k is *strictly*
+       smaller (or bigger) than all the present keys in the tree; it
+       does not test for equality with the current min (or max) key.
+
+       Indeed, they are only used during the "join" operation which
+       respects this precondition.
+    *)
+
+    let rec add_min_binding k v = function
+      | Empty -> singleton k v
+      | Node (l, x, d, r, h) ->
+        bal (add_min_binding k v l) x d r
+
+    let rec add_max_binding k v = function
+      | Empty -> singleton k v
+      | Node (l, x, d, r, h) ->
+        bal l x d (add_max_binding k v r)
+
+    (* Same as create and bal, but no assumptions are made on the
+       relative heights of l and r. *)
+
+    let rec join l v d r =
+      match (l, r) with
+        (Empty, _) -> add_min_binding v d r
+      | (_, Empty) -> add_max_binding v d l
+      | (Node(ll, lv, ld, lr, lh), Node(rl, rv, rd, rr, rh)) ->
+          if lh > rh + 2 then bal ll lv ld (join lr v d r) else
+          if rh > lh + 2 then bal (join l v d rl) rv rd rr else
+          create l v d r
+
+    (* Merge two trees l and r into one.
+       All elements of l must precede the elements of r.
+       No assumption on the heights of l and r. *)
+
+    let concat t1 t2 =
+      match (t1, t2) with
+        (Empty, t) -> t
+      | (t, Empty) -> t
+      | (_, _) ->
+          let (x, d) = min_binding t2 in
+          join t1 x d (remove_min_binding t2)
+
+    let concat_or_join t1 v d t2 =
+      match d with
+      | Some d -> join t1 v d t2
+      | None -> concat t1 t2
+
+    let rec split x = function
+        Empty ->
+          (Empty, None, Empty)
+      | Node(l, v, d, r, _) ->
+          let c = Ord.compare x v in
+          if c = 0 then (l, Some d, r)
+          else if c < 0 then
+            let (ll, pres, rl) = split x l in (ll, pres, join rl v d r)
+          else
+            let (lr, pres, rr) = split x r in (join l v d lr, pres, rr)
+
+    let rec merge f s1 s2 =
+      match (s1, s2) with
+        (Empty, Empty) -> Empty
+      | (Node (l1, v1, d1, r1, h1), _) when h1 >= height s2 ->
+          let (l2, d2, r2) = split v1 s2 in
+          concat_or_join (merge f l1 l2) v1 (f v1 (Some d1) d2) (merge f r1 r2)
+      | (_, Node (l2, v2, d2, r2, h2)) ->
+          let (l1, d1, r1) = split v2 s1 in
+          concat_or_join (merge f l1 l2) v2 (f v2 d1 (Some d2)) (merge f r1 r2)
+      | _ ->
+          assert false
+
+    let rec filter p = function
+        Empty -> Empty
+      | Node(l, v, d, r, _) ->
+          (* call [p] in the expected left-to-right order *)
+          let l' = filter p l in
+          let pvd = p v d in
+          let r' = filter p r in
+          if pvd then join l' v d r' else concat l' r'
+
+    let rec partition p = function
+        Empty -> (Empty, Empty)
+      | Node(l, v, d, r, _) ->
+          (* call [p] in the expected left-to-right order *)
+          let (lt, lf) = partition p l in
+          let pvd = p v d in
+          let (rt, rf) = partition p r in
+          if pvd
+          then (join lt v d rt, concat lf rf)
+          else (concat lt rt, join lf v d rf)
+
+    type 'a enumeration = End | More of key * 'a * 'a t * 'a enumeration
+
+    let rec cons_enum m e =
+      match m with
+        Empty -> e
+      | Node(l, v, d, r, _) -> cons_enum l (More(v, d, r, e))
+
+    let compare cmp m1 m2 =
+      let rec compare_aux e1 e2 =
+          match (e1, e2) with
+          (End, End) -> 0
+        | (End, _)  -> -1
+        | (_, End) -> 1
+        | (More(v1, d1, r1, e1), More(v2, d2, r2, e2)) ->
+            let c = Ord.compare v1 v2 in
+            if c <> 0 then c else
+            let c = cmp d1 d2 in
+            if c <> 0 then c else
+            compare_aux (cons_enum r1 e1) (cons_enum r2 e2)
+      in compare_aux (cons_enum m1 End) (cons_enum m2 End)
+
+    let equal cmp m1 m2 =
+      let rec equal_aux e1 e2 =
+          match (e1, e2) with
+          (End, End) -> true
+        | (End, _)  -> false
+        | (_, End) -> false
+        | (More(v1, d1, r1, e1), More(v2, d2, r2, e2)) ->
+            Ord.compare v1 v2 = 0 && cmp d1 d2 &&
+            equal_aux (cons_enum r1 e1) (cons_enum r2 e2)
+      in equal_aux (cons_enum m1 End) (cons_enum m2 End)
+
+    let rec cardinal = function
+        Empty -> 0
+      | Node(l, _, _, r, _) -> cardinal l + 1 + cardinal r
+
+    let rec bindings_aux accu = function
+        Empty -> accu
+      | Node(l, v, d, r, _) -> bindings_aux ((v, d) :: bindings_aux accu r) l
+
+    let bindings s =
+      bindings_aux [] s
+
+    let choose = min_binding
+
+end
--- a/samples/OCaml/mirage.ml
+++ b/samples/OCaml/mirage.ml
--- a/samples/OCaml/reload.ml
+++ b/samples/OCaml/reload.ml
@@ -0,0 +1,125 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 2000 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+open Cmm
+open Arch
+open Reg
+open Mach
+
+(* Reloading for the AMD64 *)
+
+(* Summary of instruction set constraints:
+   "S" means either stack or register, "R" means register only.
+   Operation                    Res     Arg1    Arg2
+     Imove                      R       S
+                             or S       R
+     Iconst_int                 S if 32-bit signed, R otherwise
+     Iconst_float               R
+     Iconst_symbol (not PIC)    S
+     Iconst_symbol (PIC)        R
+     Icall_ind                          R
+     Itailcall_ind                      R
+     Iload                      R       R       R
+     Istore                             R       R
+     Iintop(Icomp)              R       R       S
+                            or  S       S       R
+     Iintop(Imul|Idiv|mod)      R       R       S
+     Iintop(shift)              S       S       R
+     Iintop(others)             R       R       S
+                            or  S       S       R
+     Iintop_imm(Iadd, n)/lea    R       R
+     Iintop_imm(others)         S       S
+     Inegf...Idivf              R       R       S
+     Ifloatofint                R       S
+     Iintoffloat                R       S
+     Ispecific(Ilea)            R       R       R
+     Ispecific(Ifloatarithmem)  R       R       R
+
+   Conditional branches:
+     Iinttest                           S       R
+                                    or  R       S
+     Ifloattest                         R       S    (or  S R if swapped test)
+     other tests                        S
+*)
+
+let stackp r =
+  match r.loc with
+    Stack _ -> true
+  | _ -> false
+
+class reload = object (self)
+
+inherit Reloadgen.reload_generic as super
+
+method! reload_operation op arg res =
+  match op with
+  | Iintop(Iadd|Isub|Iand|Ior|Ixor|Icomp _|Icheckbound) ->
+      (* One of the two arguments can reside in the stack, but not both *)
+      if stackp arg.(0) && stackp arg.(1)
+      then ([|arg.(0); self#makereg arg.(1)|], res)
+      else (arg, res)
+  | Iintop_imm(Iadd, _) when arg.(0).loc <> res.(0).loc ->
+      (* This add will be turned into a lea; args and results must be
+         in registers *)
+      super#reload_operation op arg res
+  | Iintop(Idiv | Imod | Ilsl | Ilsr | Iasr)
+  | Iintop_imm(_, _) ->
+      (* The argument(s) and results can be either in register or on stack *)
+      (* Note: Idiv, Imod: arg(0) and res(0) already forced in regs
+               Ilsl, Ilsr, Iasr: arg(1) already forced in regs *)
+      (arg, res)
+  | Iintop(Imul) | Iaddf | Isubf | Imulf | Idivf ->
+      (* First argument (= result) must be in register, second arg
+         can reside in the stack *)
+      if stackp arg.(0)
+      then (let r = self#makereg arg.(0) in ([|r; arg.(1)|], [|r|]))
+      else (arg, res)
+  | Ifloatofint | Iintoffloat ->
+      (* Result must be in register, but argument can be on stack *)
+      (arg, (if stackp res.(0) then [| self#makereg res.(0) |] else res))
+  | Iconst_int n ->
+      if n <= 0x7FFFFFFFn && n >= -0x80000000n
+      then (arg, res)
+      else super#reload_operation op arg res
+  | Iconst_symbol _ ->
+      if !pic_code || !Clflags.dlcode
+      then super#reload_operation op arg res
+      else (arg, res)
+  | _ -> (* Other operations: all args and results in registers *)
+      super#reload_operation op arg res
+
+method! reload_test tst arg =
+  match tst with
+    Iinttest cmp ->
+      (* One of the two arguments can reside on stack *)
+      if stackp arg.(0) && stackp arg.(1)
+      then [| self#makereg arg.(0); arg.(1) |]
+      else arg
+  | Ifloattest((Clt|Cle), _) ->
+      (* Cf. emit.mlp: we swap arguments in this case *)
+      (* First argument can be on stack, second must be in register *)
+      if stackp arg.(1)
+      then [| arg.(0); self#makereg arg.(1) |]
+      else arg
+  | Ifloattest((Ceq|Cne|Cgt|Cge), _) ->
+      (* Second argument can be on stack, first must be in register *)
+      if stackp arg.(0)
+      then [| self#makereg arg.(0); arg.(1) |]
+      else arg
+  | _ ->
+      (* The argument(s) can be either in register or on stack *)
+      arg
+
+end
+
+let fundecl f =
+  (new reload)#fundecl f
--- a/samples/OCaml/sigset.ml
+++ b/samples/OCaml/sigset.ml
@@ -0,0 +1,70 @@
+(*
+ * Copyright (c) 2013 Jeremy Yallop.
+ *
+ * This file is distributed under the terms of the MIT License.
+ * See the file LICENSE for details.
+ *)
+
+open PosixTypes
+open Ctypes
+open Foreign
+
+type t = sigset_t ptr
+
+let t = ptr sigset_t
+
+(* This function initializes the signal set set to exclude all of the defined
+   signals. It always returns 0.  *)
+let sigemptyset = foreign "sigemptyset" (ptr sigset_t @-> returning int)
+
+let empty () =
+  let setp = allocate_n ~count:1 sigset_t in begin
+    ignore (sigemptyset setp);
+    setp
+  end
+
+(* This function initializes the signal set set to include all of the defined
+   signals. Again, the return value is 0. *)
+let sigfillset = foreign "sigfillset" (ptr sigset_t @-> returning int)
+
+let full () =
+  let setp = allocate_n ~count:1 sigset_t in begin
+    ignore (sigfillset setp);
+    setp
+  end
+
+(* This function adds the signal signum to the signal set set. All sigaddset
+   does is modify set; it does not block or unblock any signals.
+
+   The return value is 0 on success and -1 on failure. The following errno
+   error condition is defined for this function:
+
+   EINVAL The signum argument doesn't specify a valid signal. 
+*)
+let sigaddset = foreign "sigaddset" ~check_errno:true
+  (ptr sigset_t @-> int @-> returning int)
+
+let add set signal = ignore (sigaddset set signal)
+
+(* This function removes the signal signum from the signal set set. All
+   sigdelset does is modify set; it does not block or unblock any signals.
+
+   The return value and error conditions are the same as for
+   sigaddset.  *)
+let sigdelset = foreign "sigdelset" ~check_errno:true
+  (ptr sigset_t @-> int @-> returning int)
+
+let del set signal = ignore (sigdelset set signal)
+
+(* The sigismember function tests whether the signal signum is a member of the
+   signal set set. It returns 1 if the signal is in the set, 0 if not, and -1 if
+   there is an error.
+
+   The following errno error condition is defined for this function:
+
+   EINVAL The signum argument doesn't specify a valid signal. 
+*)
+let sigismember = foreign "sigismember" ~check_errno:true
+  (ptr sigset_t @-> int @-> returning int)
+
+let mem set signal = sigismember set signal <> 0
--- a/samples/OCaml/uutf.ml
+++ b/samples/OCaml/uutf.ml
@@ -0,0 +1,810 @@
+(*---------------------------------------------------------------------------
+   Copyright 2012 Daniel C. Bünzli. All rights reserved.
+   Distributed under the BSD3 license, see license at the end of the file.
+   %%NAME%% release %%VERSION%%
+  ---------------------------------------------------------------------------*)
+
+let io_buffer_size = 65536                           (* IO_BUFFER_SIZE 4.0.0 *)
+
+let pp = Format.fprintf
+let invalid_encode () = invalid_arg "expected `Await encode"
+let invalid_bounds j l =
+  invalid_arg (Printf.sprintf "invalid bounds (index %d, length %d)" j l)
+
+(* Unsafe string byte manipulations. If you don't believe the author's
+   invariants, replacing with safe versions makes everything safe in
+   the module. He won't be upset. *)
+
+let unsafe_chr = Char.unsafe_chr
+let unsafe_blit = String.unsafe_blit
+let unsafe_array_get = Array.unsafe_get
+let unsafe_byte s j = Char.code (String.unsafe_get s j)
+let unsafe_set_byte s j byte = String.unsafe_set s j (Char.unsafe_chr byte)
+
+(* Unicode characters *)
+
+type uchar = int
+let u_bom = 0xFEFF                                                   (* BOM. *)
+let u_rep = 0xFFFD                                 (* replacement character. *)
+let is_uchar cp =
+  (0x0000 <= cp && cp <= 0xD7FF) || (0xE000 <= cp && cp <= 0x10FFFF)
+
+let pp_cp ppf cp =
+  if cp < 0 || cp > 0x10FFFF then pp ppf "U+Invalid(%X)" cp else
+  if cp <= 0xFFFF then pp ppf "U+%04X" cp else
+  pp ppf "U+%X" cp
+
+let cp_to_string cp =                                    (* NOT thread safe. *)
+  pp Format.str_formatter "%a" pp_cp cp; Format.flush_str_formatter ()
+
+(* Unicode encoding schemes *)
+
+type encoding = [ `UTF_8 | `UTF_16 | `UTF_16BE | `UTF_16LE ]
+type decoder_encoding = [ encoding | `US_ASCII | `ISO_8859_1 ]
+
+let encoding_of_string s = match String.uppercase s with      (* IANA names. *)
+| "UTF-8" -> Some `UTF_8
+| "UTF-16" -> Some `UTF_16
+| "UTF-16LE" -> Some `UTF_16LE
+| "UTF-16BE" -> Some `UTF_16BE
+| "ANSI_X3.4-1968" | "ISO-IR-6" | "ANSI_X3.4-1986" | "ISO_646.IRV:1991"
+| "ASCII" | "ISO646-US" | "US-ASCII" | "US" | "IBM367" | "CP367" | "CSASCII" ->
+    Some `US_ASCII
+| "ISO_8859-1:1987" | "ISO-IR-100" | "ISO_8859-1" | "ISO-8859-1"
+| "LATIN1" | "L1" | "IBM819" | "CP819" | "CSISOLATIN1" ->
+    Some `ISO_8859_1
+| _ -> None
+
+let encoding_to_string = function
+| `UTF_8 -> "UTF-8" | `UTF_16 -> "UTF-16" | `UTF_16BE -> "UTF-16BE"
+| `UTF_16LE -> "UTF-16LE" | `US_ASCII -> "US-ASCII"
+| `ISO_8859_1 -> "ISO-8859-1"
+
+(* Base character decoders. They assume enough data. *)
+
+let malformed s j l = `Malformed (String.sub s j l)
+let malformed_pair be hi s j l =    (* missing or half low surrogate at eoi. *)
+  let bs1 = String.sub s j l in
+  let bs0 = String.create 2 in
+  let j0, j1 = if be then (0, 1) else (1, 0) in
+  unsafe_set_byte bs0 j0 (hi lsr 8);
+  unsafe_set_byte bs0 j1 (hi land 0xFF);
+  `Malformed (bs0 ^ bs1)
+
+let r_us_ascii s j =
+  (* assert (0 <= j && j < String.length s); *)
+  let b0 = unsafe_byte s j in
+  if b0 <= 127 then `Uchar b0 else malformed s j 1
+
+let r_iso_8859_1 s j =
+  (* assert (0 <= j && j < String.length s); *)
+  `Uchar (unsafe_byte s j)
+
+let utf_8_len = [| (* uchar byte length according to first UTF-8 byte. *)
+  1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
+  1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
+  1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
+  1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
+  1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
+  1; 1; 1; 1; 1; 1; 1; 1; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
+  0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
+  0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
+  0; 0; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2;
+  2; 2; 2; 2; 2; 2; 2; 2; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3;
+  4; 4; 4; 4; 4; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0 |]
+
+let r_utf_8 s j l =
+  (* assert (0 <= j && 0 <= l && j + l <= String.length s); *)
+  match l with
+  | 1 -> `Uchar (unsafe_byte s j)
+  | 2 ->
+      let b0 = unsafe_byte s j in let b1 = unsafe_byte s (j + 1) in
+      if b1 lsr 6 != 0b10 then malformed s j l else
+      `Uchar (((b0 land 0x1F) lsl 6) lor (b1 land 0x3F))
+  | 3 ->
+      let b0 = unsafe_byte s j in let b1 = unsafe_byte s (j + 1) in
+      let b2 = unsafe_byte s (j + 2) in
+      let c = `Uchar (((b0 land 0x0F) lsl 12) lor
+                      ((b1 land 0x3F) lsl 6) lor
+          (b2 land 0x3F))
+      in
+      if b2 lsr 6 != 0b10 then malformed s j l else
+      begin match b0 with
+      | 0xE0 -> if b1 < 0xA0 || 0xBF < b1 then malformed s j l else c
+      | 0xED -> if b1 < 0x80 || 0x9F < b1 then malformed s j l else c
+      | _ -> if b1 lsr 6 != 0b10 then malformed s j l else c
+      end
+  | 4 ->
+      let b0 = unsafe_byte s j in let b1 = unsafe_byte s (j + 1) in
+      let b2 = unsafe_byte s (j + 2) in let b3 = unsafe_byte s (j + 3) in
+      let c = `Uchar (((b0 land 0x07) lsl 18) lor
+                      ((b1 land 0x3F) lsl 12) lor
+          ((b2 land 0x3F) lsl 6) lor
+                      (b3 land 0x3F))
+      in
+      if b3 lsr 6 != 0b10 || b2 lsr 6 != 0b10 then malformed s j l else
+      begin match b0 with
+      | 0xF0 -> if b1 < 0x90 || 0xBF < b1 then malformed s j l else c
+      | 0xF4 -> if b1 < 0x80 || 0x8F < b1 then malformed s j l else c
+      | _ -> if b1 lsr 6 != 0b10 then malformed s j l else c
+      end
+  | _ -> assert false
+
+let r_utf_16 s j0 j1 =                       (* May return a high surrogate. *)
+  (* assert (0 <= j0 && 0 <= j1 && max j0 j1 < String.length s); *)
+  let b0 = unsafe_byte s j0 in let b1 = unsafe_byte s j1 in
+  let u = (b0 lsl 8) lor b1 in
+  if u < 0xD800 || u > 0xDFFF then `Uchar u else
+  if u > 0xDBFF then malformed s (min j0 j1) 2 else `Hi u
+
+let r_utf_16_lo hi s j0 j1 =          (* Combines [hi] with a low surrogate. *)
+  (* assert (0 <= j0 && 0 <= j1 && max j0 j1 < String.length s); *)
+  let b0 = unsafe_byte s j0 in
+  let b1 = unsafe_byte s j1 in
+  let lo = (b0 lsl 8) lor b1 in
+  if lo < 0xDC00 || lo > 0xDFFF
+  then malformed_pair (j0 < j1 (* true => be *)) hi s (min j0 j1) 2
+  else `Uchar ((((hi land 0x3FF) lsl 10) lor (lo land 0x3FF)) + 0x10000)
+
+let r_encoding s j l =                  (* guess encoding with max. 3 bytes. *)
+  (* assert (0 <= j && 0 <= l && j + l <= String.length s) *)
+  let some i = if i < l then Some (unsafe_byte s (j + i)) else None in
+  match (some 0), (some 1), (some 2) with
+  | Some 0xEF, Some 0xBB, Some 0xBF                 -> `UTF_8 `BOM
+  | Some 0xFE, Some 0xFF, _                         -> `UTF_16BE `BOM
+  | Some 0xFF, Some 0xFE, _                         -> `UTF_16LE `BOM
+  | Some 0x00, Some    p, _              when p > 0 -> `UTF_16BE (`ASCII p)
+  | Some    p, Some 0x00, _              when p > 0 -> `UTF_16LE (`ASCII p)
+  | Some    u,         _, _ when utf_8_len.(u) <> 0 -> `UTF_8 `Decode
+  | Some    _, Some    _, _                         -> `UTF_16BE `Decode
+  | Some    _, None     , None                      -> `UTF_8 `Decode
+  | None     , None     , None                      -> `UTF_8 `End
+  | None     , Some    _, _                         -> assert false
+  | Some    _, None     , Some _                    -> assert false
+  | None     , None     , Some _                    -> assert false
+
+(* Decode *)
+
+type src = [ `Channel of in_channel | `String of string | `Manual ]
+type nln = [ `ASCII of uchar | `NLF of uchar | `Readline of uchar ]
+type decode = [ `Await | `End | `Malformed of string | `Uchar of uchar]
+
+let pp_decode ppf = function
+| `Uchar u -> pp ppf "@[`Uchar %a@]" pp_cp u
+| `End -> pp ppf "`End"
+| `Await -> pp ppf "`Await"
+| `Malformed bs ->
+    let l = String.length bs in
+    pp ppf "@[`Malformed (";
+    if l > 0 then pp ppf "%02X" (Char.code (bs.[0]));
+    for i = 1 to l - 1 do pp ppf " %02X" (Char.code (bs.[i])) done;
+    pp ppf ")@]"
+
+type decoder =
+  { src : src;                                              (* input source. *)
+    mutable encoding : decoder_encoding;                (* decoded encoding. *)
+    nln : nln option;                     (* newline normalization (if any). *)
+    nl : int;                            (* newline normalization character. *)
+    mutable i : string;                              (* current input chunk. *)
+    mutable i_pos : int;                          (* input current position. *)
+    mutable i_max : int;                          (* input maximal position. *)
+    t : string;        (* four bytes temporary buffer for overlapping reads. *)
+    mutable t_len : int;                      (* current byte length of [t]. *)
+    mutable t_need : int;                  (* number of bytes needed in [t]. *)
+    mutable removed_bom : bool;     (* [true] if an initial BOM was removed. *)
+    mutable last_cr : bool;                   (* [true] if last char was CR. *)
+    mutable line : int;                                      (* line number. *)
+    mutable col : int;                                     (* column number. *)
+    mutable byte_count : int;                                 (* byte count. *)
+    mutable count : int;                                      (* char count. *)
+    mutable pp :        (* decoder post-processor for BOM, position and nln. *)
+      decoder -> [ `Malformed of string | `Uchar of uchar ] -> decode;
+    mutable k : decoder -> decode }                 (* decoder continuation. *)
+
+(* On decodes that overlap two (or more) [d.i] buffers, we use [t_fill] to copy
+   the input data to [d.t] and decode from there. If the [d.i] buffers are not
+   too small this is faster than continuation based byte per byte writes.
+
+   End of input (eoi) is signalled by [d.i_pos = 0] and [d.i_max = min_int]
+   which implies that [i_rem d < 0] is [true]. *)
+
+let i_rem d = d.i_max - d.i_pos + 1     (* remaining bytes to read in [d.i]. *)
+let eoi d = d.i <- ""; d.i_pos <- 0; d.i_max <- min_int   (* set eoi in [d]. *)
+let src d s j l =                                     (* set [d.i] with [s]. *)
+  if (j < 0 || l < 0 || j + l > String.length s) then invalid_bounds j l else
+  if (l = 0) then eoi d else
+  (d.i <- s; d.i_pos <- j; d.i_max <- j + l - 1)
+
+let refill k d = match d.src with  (* get new input in [d.i] and [k]ontinue. *)
+| `Manual -> d.k <- k; `Await
+| `String _ -> eoi d; k d
+| `Channel ic ->
+    let rc = input ic d.i 0 (String.length d.i) in
+    (src d d.i 0 rc; k d)
+
+let t_need d need = d.t_len <- 0; d.t_need <- need
+let rec t_fill k d =      (* get [d.t_need] bytes (or less if eoi) in [i.t]. *)
+  let blit d l =
+    unsafe_blit d.i d.i_pos d.t d.t_len (* write pos. *) l;
+    d.i_pos <- d.i_pos + l; d.t_len <- d.t_len + l;
+  in
+  let rem = i_rem d in
+  if rem < 0 (* eoi *) then k d else
+  let need = d.t_need - d.t_len in
+  if rem < need then (blit d rem; refill (t_fill k) d) else (blit d need; k d)
+
+let ret k v byte_count d =                     (* return post-processed [v]. *)
+  d.k <- k; d.byte_count <- d.byte_count + byte_count; d.pp d v
+
+(* Decoders. *)
+
+let rec decode_us_ascii d =
+  let rem = i_rem d in
+  if rem <= 0 then (if rem < 0 then `End else refill decode_us_ascii d) else
+  let j = d.i_pos in
+  d.i_pos <- d.i_pos + 1; ret decode_us_ascii (r_us_ascii d.i j) 1 d
+
+let rec decode_iso_8859_1 d =
+  let rem = i_rem d in
+  if rem <= 0 then (if rem < 0 then `End else refill decode_iso_8859_1 d) else
+  let j = d.i_pos in
+  d.i_pos <- d.i_pos + 1; ret decode_iso_8859_1 (r_iso_8859_1 d.i j) 1 d
+
+(* UTF-8 decoder *)
+
+let rec t_decode_utf_8 d =                             (* decode from [d.t]. *)
+  if d.t_len < d.t_need
+  then ret decode_utf_8 (malformed d.t 0 d.t_len) d.t_len d
+  else ret decode_utf_8 (r_utf_8 d.t 0 d.t_len) d.t_len d
+
+and decode_utf_8 d =
+  let rem = i_rem d in
+  if rem <= 0 then (if rem < 0 then `End else refill decode_utf_8 d) else
+  let need = unsafe_array_get utf_8_len (unsafe_byte d.i d.i_pos) in
+  if rem < need then (t_need d need; t_fill t_decode_utf_8 d) else
+  let j = d.i_pos in
+  if need = 0
+  then (d.i_pos <- d.i_pos + 1; ret decode_utf_8 (malformed d.i j 1) 1 d)
+  else (d.i_pos <- d.i_pos + need; ret decode_utf_8 (r_utf_8 d.i j need) need d)
+
+(* UTF-16BE decoder *)
+
+let rec t_decode_utf_16be_lo hi d =                    (* decode from [d.t]. *)
+  let bcount = d.t_len + 2 (* hi count *) in
+  if d.t_len < d.t_need
+  then ret decode_utf_16be (malformed_pair true hi d.t 0 d.t_len) bcount d
+  else ret decode_utf_16be (r_utf_16_lo hi d.t 0 1) bcount d
+
+and t_decode_utf_16be d =                              (* decode from [d.t]. *)
+  if d.t_len < d.t_need
+  then ret decode_utf_16be (malformed d.t 0 d.t_len) d.t_len d
+  else decode_utf_16be_lo (r_utf_16 d.t 0 1) d
+
+and decode_utf_16be_lo v d = match v with
+| `Uchar _ | `Malformed _ as v -> ret decode_utf_16be v 2 d
+| `Hi hi ->
+    let rem = i_rem d in
+    if rem < 2 then (t_need d 2; t_fill (t_decode_utf_16be_lo hi) d) else
+    let j = d.i_pos in
+    d.i_pos <- d.i_pos + 2;
+    ret decode_utf_16be (r_utf_16_lo hi d.i j (j + 1)) 4 d
+
+and decode_utf_16be d =
+  let rem = i_rem d in
+  if rem <= 0 then (if rem < 0 then `End else refill decode_utf_16be d) else
+  if rem < 2 then (t_need d 2; t_fill t_decode_utf_16be d) else
+  let j = d.i_pos in
+  d.i_pos <- d.i_pos + 2; decode_utf_16be_lo (r_utf_16 d.i j (j + 1)) d
+
+(* UTF-16LE decoder, same as UTF-16BE with byte swapped. *)
+
+let rec t_decode_utf_16le_lo hi d =                    (* decode from [d.t]. *)
+  let bcount = d.t_len + 2 (* hi count *) in
+  if d.t_len < d.t_need
+  then ret decode_utf_16le (malformed_pair false hi d.t 0 d.t_len) bcount d
+  else ret decode_utf_16le (r_utf_16_lo hi d.t 1 0) bcount d
+
+and t_decode_utf_16le d =                              (* decode from [d.t]. *)
+  if d.t_len < d.t_need
+  then ret decode_utf_16le (malformed d.t 0 d.t_len) d.t_len d
+  else decode_utf_16le_lo (r_utf_16 d.t 1 0) d
+
+and decode_utf_16le_lo v d = match v with
+| `Uchar _ | `Malformed _ as v -> ret decode_utf_16le v 2 d
+| `Hi hi ->
+    let rem = i_rem d in
+    if rem < 2 then (t_need d 2; t_fill (t_decode_utf_16le_lo hi) d) else
+    let j = d.i_pos in
+    d.i_pos <- d.i_pos + 2;
+    ret decode_utf_16le (r_utf_16_lo hi d.i (j + 1) j) 4 d
+
+and decode_utf_16le d =
+  let rem = i_rem d in
+  if rem <= 0 then (if rem < 0 then `End else refill decode_utf_16le d) else
+  if rem < 2 then (t_need d 2; t_fill t_decode_utf_16le d) else
+  let j = d.i_pos in
+  d.i_pos <- d.i_pos + 2; decode_utf_16le_lo (r_utf_16 d.i (j + 1) j) d
+
+(* Encoding guessing. The guess is simple but starting the decoder
+   after is tedious, uutf's decoders are not designed to put bytes
+   back in the stream. *)
+
+let guessed_utf_8 d =                   (* start decoder after `UTF_8 guess. *)
+  let b3 d =                                 (* handles the third read byte. *)
+    let b3 = unsafe_byte d.t 2 in
+    match utf_8_len.(b3) with
+    | 0 -> ret decode_utf_8 (malformed d.t 2 1) 1 d
+    | n ->
+        d.t_need <- n; d.t_len <- 1; unsafe_set_byte d.t 0 b3;
+        t_fill t_decode_utf_8 d
+  in
+  let b2 d =                                     (* handle second read byte. *)
+    let b2 = unsafe_byte d.t 1 in
+    let b3 = if d.t_len > 2 then b3 else decode_utf_8 (* decodes `End *) in
+    match utf_8_len.(b2) with
+    | 0 -> ret b3 (malformed d.t 1 1) 1 d
+    | 1 -> ret b3 (r_utf_8 d.t 1 1) 1 d
+    | n ->                         (* copy d.t.(1-2) to d.t.(0-1) and decode *)
+        d.t_need <- n;
+        unsafe_set_byte d.t 0 b2;
+        if (d.t_len < 3) then d.t_len <- 1 else
+        (d.t_len <- 2; unsafe_set_byte d.t 1 (unsafe_byte d.t 2); );
+        t_fill t_decode_utf_8 d
+  in
+  let b1 = unsafe_byte d.t 0 in                   (* handle first read byte. *)
+  let b2 = if d.t_len > 1 then b2 else decode_utf_8 (* decodes `End *) in
+  match utf_8_len.(b1) with
+  | 0 -> ret b2 (malformed d.t 0 1) 1 d
+  | 1 -> ret b2 (r_utf_8 d.t 0 1) 1 d
+  | 2 ->
+      if d.t_len < 2 then ret decode_utf_8 (malformed d.t 0 1) 1 d else
+      if d.t_len < 3 then ret decode_utf_8 (r_utf_8 d.t 0 2) 2 d else
+      ret b3 (r_utf_8 d.t 0 2) 2 d
+  | 3 ->
+      if d.t_len < 3
+      then ret decode_utf_8 (malformed d.t 0 d.t_len) d.t_len d
+      else ret decode_utf_8 (r_utf_8 d.t 0 3) 3 d
+  | 4 ->
+      if d.t_len < 3
+      then ret decode_utf_8 (malformed d.t 0 d.t_len) d.t_len d
+      else (d.t_need <- 4; t_fill t_decode_utf_8 d)
+  | n -> assert false
+
+let guessed_utf_16 d be v =     (* start decoder after `UTF_16{BE,LE} guess. *)
+  let decode_utf_16, t_decode_utf_16, t_decode_utf_16_lo, j0, j1 =
+    if be then decode_utf_16be, t_decode_utf_16be, t_decode_utf_16be_lo, 0, 1
+    else decode_utf_16le, t_decode_utf_16le, t_decode_utf_16le_lo, 1, 0
+  in
+  let b3 k d =
+    if d.t_len < 3 then decode_utf_16 d (* decodes `End *) else
+    begin                             (* copy d.t.(2) to d.t.(0) and decode. *)
+      d.t_need <- 2; d.t_len <- 1;
+      unsafe_set_byte d.t 0 (unsafe_byte d.t 2);
+      t_fill k d
+    end
+  in
+  match v with
+  | `BOM -> ret (b3 t_decode_utf_16) (`Uchar u_bom) 2 d
+  | `ASCII u -> ret (b3 t_decode_utf_16) (`Uchar u) 2 d
+  | `Decode ->
+      match r_utf_16 d.t j0 j1 with
+      | `Malformed _ | `Uchar _ as v -> ret (b3 t_decode_utf_16) v 2 d
+      | `Hi hi ->
+        if d.t_len < 3
+        then ret decode_utf_16 (malformed_pair be hi "" 0 0) d.t_len d
+        else (b3 (t_decode_utf_16_lo hi)) d
+
+let guess_encoding d =                  (* guess encoding and start decoder. *)
+  let setup d = match r_encoding d.t 0 d.t_len with
+  | `UTF_8 r ->
+      d.encoding <- `UTF_8; d.k <- decode_utf_8;
+      begin match r with
+      | `BOM -> ret decode_utf_8 (`Uchar u_bom) 3 d
+      | `Decode -> guessed_utf_8 d
+      | `End -> `End
+      end
+  | `UTF_16BE r ->
+      d.encoding <- `UTF_16BE; d.k <- decode_utf_16be; guessed_utf_16 d true r
+  | `UTF_16LE r ->
+      d.encoding <- `UTF_16LE; d.k <- decode_utf_16le; guessed_utf_16 d false r
+
+  in
+  (t_need d 3; t_fill setup d)
+
+(* Character post-processors. Used for BOM handling, newline
+   normalization and position tracking. The [pp_remove_bom] is only
+   used for the first character to remove a possible initial BOM and
+   handle UTF-16 endianness recognition. *)
+
+let nline d = d.col <- 0; d.line <- d.line + 1                   (* inlined. *)
+let ncol d = d.col <- d.col + 1                                  (* inlined. *)
+let ncount d = d.count <- d.count + 1                            (* inlined. *)
+let cr d b = d.last_cr <- b                                      (* inlined. *)
+
+let pp_remove_bom utf16 pp d = function(* removes init. BOM, handles UTF-16. *)
+| `Uchar 0xFEFF (* BOM *) ->
+    if utf16 then (d.encoding <- `UTF_16BE; d.k <- decode_utf_16be);
+    d.removed_bom <- true; d.pp <- pp; d.k d
+| `Uchar 0xFFFE (* BOM reversed from decode_utf_16be *) when utf16 ->
+    d.encoding <- `UTF_16LE; d.k <- decode_utf_16le;
+    d.removed_bom <- true; d.pp <- pp; d.k d
+| `Malformed _ | `Uchar _ as v ->
+    d.removed_bom <- false; d.pp <- pp; d.pp d v
+
+let pp_nln_none d = function
+| `Uchar 0x000A (* LF *) as v ->
+    let last_cr = d.last_cr in
+    cr d false; ncount d; if last_cr then v else (nline d; v)
+| `Uchar 0x000D (* CR *) as v -> cr d true; ncount d; nline d; v
+| `Uchar (0x0085 | 0x000C | 0x2028 | 0x2029) (* NEL | FF | LS | PS *) as v ->
+    cr d false; ncount d; nline d; v
+| `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v
+
+let pp_nln_readline d = function
+| `Uchar 0x000A (* LF *) ->
+    let last_cr = d.last_cr in
+    cr d false; if last_cr then d.k d else (ncount d; nline d; `Uchar d.nl)
+| `Uchar 0x000D (* CR *) -> cr d true; ncount d; nline d; `Uchar d.nl
+| `Uchar (0x0085 | 0x000C | 0x2028 | 0x2029) (* NEL | FF | LS | PS *) ->
+    cr d false; ncount d; nline d; `Uchar d.nl
+| `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v
+
+let pp_nln_nlf d = function
+| `Uchar 0x000A (* LF *) ->
+    let last_cr = d.last_cr in
+    cr d false; if last_cr then d.k d else (ncount d; nline d; `Uchar d.nl)
+| `Uchar 0x000D (* CR *) -> cr d true; ncount d; nline d; `Uchar d.nl
+| `Uchar 0x0085 (* NEL *) -> cr d false; ncount d; nline d; `Uchar d.nl
+| `Uchar (0x000C | 0x2028 | 0x2029) as v (* FF | LS | PS *) ->
+    cr d false; ncount d; nline d; v
+| `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v
+
+let pp_nln_ascii d = function
+| `Uchar 0x000A (* LF *) ->
+    let last_cr = d.last_cr in
+    cr d false; if last_cr then d.k d else (ncount d; nline d; `Uchar d.nl)
+| `Uchar 0x000D (* CR *) -> cr d true; ncount d; nline d; `Uchar d.nl
+| `Uchar (0x0085 | 0x000C | 0x2028 | 0x2029) as v (* NEL | FF | LS | PS *) ->
+    cr d false; ncount d; nline d; v
+| `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v
+
+let decode_fun = function
+| `UTF_8 -> decode_utf_8
+| `UTF_16 -> decode_utf_16be                         (* see [pp_remove_bom]. *)
+| `UTF_16BE -> decode_utf_16be
+| `UTF_16LE -> decode_utf_16le
+| `US_ASCII -> decode_us_ascii
+| `ISO_8859_1 -> decode_iso_8859_1
+
+let decoder ?nln ?encoding src =
+  let pp, nl = match nln with
+  | None -> pp_nln_none, 0x000A (* not used. *)
+  | Some (`ASCII nl) -> pp_nln_ascii, nl
+  | Some (`NLF nl) -> pp_nln_nlf, nl
+  | Some (`Readline nl) -> pp_nln_readline, nl
+  in
+  let encoding, k = match encoding with
+  | None -> `UTF_8, guess_encoding
+  | Some e -> (e :> decoder_encoding), decode_fun e
+  in
+  let i, i_pos, i_max = match src with
+  | `Manual -> "", 1, 0                            (* implies src_rem d = 0. *)
+  | `Channel _ -> String.create io_buffer_size, 1, 0                (* idem. *)
+  | `String s -> s, 0, String.length s - 1
+  in
+  { src = (src :> src); encoding; nln = (nln :> nln option); nl;
+    i; i_pos; i_max; t = String.create 4; t_len = 0; t_need = 0;
+    removed_bom = false; last_cr = false; line = 1; col = 0;
+    byte_count = 0; count = 0;
+    pp = pp_remove_bom (encoding = `UTF_16) pp; k }
+
+let decode d = d.k d
+let decoder_line d = d.line
+let decoder_col d = d.col
+let decoder_byte_count d = d.byte_count
+let decoder_count d = d.count
+let decoder_removed_bom d = d.removed_bom
+let decoder_src d = d.src
+let decoder_nln d = d.nln
+let decoder_encoding d = d.encoding
+let set_decoder_encoding d e =
+  d.encoding <- (e :> decoder_encoding); d.k <- decode_fun e
+
+(* Encode *)
+
+type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ]
+type encode = [ `Await | `End | `Uchar of uchar ]
+type encoder =
+  { dst : dst;                                        (* output destination. *)
+    encoding : encoding;                                (* encoded encoding. *)
+    mutable o : string;                             (* current output chunk. *)
+    mutable o_pos : int;                   (* next output position to write. *)
+    mutable o_max : int;                (* maximal output position to write. *)
+    t : string;                 (* four bytes buffer for overlapping writes. *)
+    mutable t_pos : int;                    (* next position to read in [t]. *)
+    mutable t_max : int;                 (* maximal position to read in [t]. *)
+    mutable k :                                     (* encoder continuation. *)
+      encoder -> encode -> [ `Ok | `Partial ] }
+
+(* On encodes that overlap two (or more) [e.o] buffers, we encode the
+   character to the temporary buffer [o.t] and continue with
+   [tmp_flush] to write this data on the different [e.o] buffers. If
+   the [e.o] buffers are not too small this is faster than
+   continuation based byte per byte writes. *)
+
+let o_rem e = e.o_max - e.o_pos + 1    (* remaining bytes to write in [e.o]. *)
+let dst e s j l =                                     (* set [e.o] with [s]. *)
+  if (j < 0 || l < 0 || j + l > String.length s) then invalid_bounds j l;
+  e.o <- s; e.o_pos <- j; e.o_max <- j + l - 1
+
+let partial k e = function `Await -> k e | `Uchar _ | `End -> invalid_encode ()
+let flush k e = match e.dst with(* get free storage in [d.o] and [k]ontinue. *)
+| `Manual -> e.k <- partial k; `Partial
+| `Buffer b -> Buffer.add_substring b e.o 0 e.o_pos; e.o_pos <- 0; k e
+| `Channel oc -> output oc e.o 0 e.o_pos; e.o_pos <- 0; k e
+
+let t_range e max = e.t_pos <- 0; e.t_max <- max
+let rec t_flush k e =               (* flush [d.t] up to [d.t_max] in [d.i]. *)
+  let blit e l =
+    unsafe_blit e.t e.t_pos e.o e.o_pos l;
+    e.o_pos <- e.o_pos + l; e.t_pos <- e.t_pos + l
+  in
+  let rem = o_rem e in
+  let len = e.t_max - e.t_pos + 1 in
+  if rem < len then (blit e rem; flush (t_flush k) e) else (blit e len; k e)
+
+(* Encoders. *)
+
+let rec encode_utf_8 e v =
+  let k e = e.k <- encode_utf_8; `Ok in
+  match v with
+  | `Await -> k e
+  | `End -> flush k e
+  | `Uchar u as v ->
+      let rem = o_rem e in
+      if u <= 0x007F then
+      if rem < 1 then flush (fun e -> encode_utf_8 e v) e else
+      (unsafe_set_byte e.o e.o_pos u; e.o_pos <- e.o_pos + 1; k e)
+      else if u <= 0x07FF then
+      begin
+        let s, j, k =
+          if rem < 2 then (t_range e 1; e.t, 0, t_flush k) else
+          let j = e.o_pos in (e.o_pos <- e.o_pos + 2; e.o, j, k)
+        in
+        unsafe_set_byte s j (0xC0 lor (u lsr 6));
+        unsafe_set_byte s (j + 1) (0x80 lor (u land 0x3F));
+        k e
+      end
+      else if u <= 0xFFFF then
+      begin
+        let s, j, k =
+          if rem < 3 then (t_range e 2; e.t, 0, t_flush k) else
+          let j = e.o_pos in (e.o_pos <- e.o_pos + 3; e.o, j, k)
+        in
+        unsafe_set_byte s j (0xE0 lor (u lsr 12));
+        unsafe_set_byte s (j + 1) (0x80 lor ((u lsr 6) land 0x3F));
+        unsafe_set_byte s (j + 2) (0x80 lor (u land 0x3F));
+        k e
+      end
+      else
+      begin
+        let s, j, k =
+          if rem < 4 then (t_range e 3; e.t, 0, t_flush k) else
+          let j = e.o_pos in (e.o_pos <- e.o_pos + 4; e.o, j, k)
+        in
+        unsafe_set_byte s j (0xF0 lor (u lsr 18));
+        unsafe_set_byte s (j + 1) (0x80 lor ((u lsr 12) land 0x3F));
+        unsafe_set_byte s (j + 2) (0x80 lor ((u lsr 6) land 0x3F));
+        unsafe_set_byte s (j + 3) (0x80 lor (u land 0x3F));
+        k e
+      end
+
+let rec encode_utf_16be e v =
+  let k e = e.k <- encode_utf_16be; `Ok in
+  match v with
+  | `Await -> k e
+  | `End -> flush k e
+  | `Uchar u ->
+      let rem = o_rem e in
+      if u < 0x10000 then
+      begin
+        let s, j, k =
+          if rem < 2 then (t_range e 1; e.t, 0, t_flush k) else
+          let j = e.o_pos in (e.o_pos <- e.o_pos + 2; e.o, j, k)
+        in
+        unsafe_set_byte s j (u lsr 8);
+        unsafe_set_byte s (j + 1) (u land 0xFF);
+        k e
+      end else begin
+        let s, j, k =
+          if rem < 4 then (t_range e 3; e.t, 0, t_flush k) else
+          let j = e.o_pos in (e.o_pos <- e.o_pos + 4; e.o, j, k)
+        in
+        let u' = u - 0x10000 in
+        let hi = (0xD800 lor (u' lsr 10)) in
+        let lo = (0xDC00 lor (u' land 0x3FF)) in
+        unsafe_set_byte s j (hi lsr 8);
+        unsafe_set_byte s (j + 1) (hi land 0xFF);
+        unsafe_set_byte s (j + 2) (lo lsr 8);
+        unsafe_set_byte s (j + 3) (lo land 0xFF);
+        k e
+      end
+
+let rec encode_utf_16le e v =         (* encode_uft_16be with bytes swapped. *)
+  let k e = e.k <- encode_utf_16le; `Ok in
+  match v with
+  | `Await -> k e
+  | `End -> flush k e
+  | `Uchar u ->
+    let rem = o_rem e in
+    if u < 0x10000 then
+    begin
+      let s, j, k =
+        if rem < 2 then (t_range e 1; e.t, 0, t_flush k) else
+        let j = e.o_pos in (e.o_pos <- e.o_pos + 2; e.o, j, k)
+      in
+      unsafe_set_byte s j (u land 0xFF);
+      unsafe_set_byte s (j + 1) (u lsr 8);
+      k e
+    end
+    else
+    begin
+      let s, j, k =
+        if rem < 4 then (t_range e 3; e.t, 0, t_flush k) else
+        let j = e.o_pos in (e.o_pos <- e.o_pos + 4; e.o, j, k)
+      in
+      let u' = u - 0x10000 in
+      let hi = (0xD800 lor (u' lsr 10)) in
+      let lo = (0xDC00 lor (u' land 0x3FF)) in
+      unsafe_set_byte s j (hi land 0xFF);
+      unsafe_set_byte s (j + 1) (hi lsr 8);
+      unsafe_set_byte s (j + 2) (lo land 0xFF);
+      unsafe_set_byte s (j + 3) (lo lsr 8);
+      k e
+    end
+
+let encode_fun = function
+| `UTF_8 -> encode_utf_8
+| `UTF_16 -> encode_utf_16be
+| `UTF_16BE -> encode_utf_16be
+| `UTF_16LE -> encode_utf_16le
+
+let encoder encoding dst =
+  let o, o_pos, o_max = match dst with
+  | `Manual -> "", 1, 0                              (* implies o_rem e = 0. *)
+  | `Buffer _
+  | `Channel _ -> String.create io_buffer_size, 0, io_buffer_size - 1
+  in
+  { dst = (dst :> dst); encoding = (encoding :> encoding); o; o_pos; o_max;
+    t = String.create 4; t_pos = 1; t_max = 0; k = encode_fun encoding}
+
+let encode e v = e.k e (v :> encode)
+let encoder_encoding e = e.encoding
+let encoder_dst e = e.dst
+
+(* Manual sources and destinations. *)
+
+module Manual = struct
+  let src = src
+  let dst = dst
+  let dst_rem = o_rem
+end
+
+(* Strings folders and Buffer encoders *)
+
+module String = struct
+  let encoding_guess s = match r_encoding s 0 (max (String.length s) 3) with
+  | `UTF_8 d -> `UTF_8, (d = `BOM)
+  | `UTF_16BE d -> `UTF_16BE, (d = `BOM)
+  | `UTF_16LE d -> `UTF_16LE, (d = `BOM)
+
+  type 'a folder =
+    'a -> int -> [ `Uchar of uchar | `Malformed of string ] -> 'a
+
+  let fold_utf_8 f acc s =
+    let rec loop acc f s i l =
+      if i = l then acc else
+      let need = unsafe_array_get utf_8_len (unsafe_byte s i) in
+      if need = 0 then loop (f acc i (malformed s i 1)) f s (i + 1) l else
+      let rem = l - i in
+      if rem < need then f acc i (malformed s i rem) else
+      loop (f acc i (r_utf_8 s i need)) f s (i + need) l
+    in
+    loop acc f s 0 (String.length s)
+
+  let fold_utf_16be f acc s =
+    let rec loop acc f s i l =
+      if i = l then acc else
+      let rem = l - i in
+      if rem < 2 then f acc i (malformed s i 1) else
+      match r_utf_16 s i (i + 1) with
+      | `Uchar _ | `Malformed _ as v -> loop (f acc i v) f s (i + 2) l
+      | `Hi hi ->
+          if rem < 4 then f acc i (malformed s i rem)  else
+          loop (f acc i (r_utf_16_lo hi s (i + 2) (i + 3))) f s (i + 4) l
+    in
+    loop acc f s 0 (String.length s)
+
+  let fold_utf_16le f acc s =             (* [fold_utf_16be], bytes swapped. *)
+    let rec loop acc f s i l =
+      if i = l then acc else
+      let rem = l - i in
+      if rem < 2 then f acc i (malformed s i 1) else
+      match r_utf_16 s (i + 1) i with
+      | `Uchar _ | `Malformed _ as v -> loop (f acc i v) f s (i + 2) l
+      | `Hi hi ->
+          if rem < 4 then f acc i (malformed s i rem)  else
+          loop (f acc i (r_utf_16_lo hi s (i + 3) (i + 2))) f s (i + 4) l
+    in
+    loop acc f s 0 (String.length s)
+end
+
+module Buffer = struct
+  let add_utf_8 b u =
+    let w byte = Buffer.add_char b (unsafe_chr byte) in          (* inlined. *)
+    if u <= 0x007F then
+    (w u)
+    else if u <= 0x07FF then
+    (w (0xC0 lor (u lsr 6));
+     w (0x80 lor (u land 0x3F)))
+    else if u <= 0xFFFF then
+    (w (0xE0 lor (u lsr 12));
+     w (0x80 lor ((u lsr 6) land 0x3F));
+     w (0x80 lor (u land 0x3F)))
+    else
+    (w (0xF0 lor (u lsr 18));
+     w (0x80 lor ((u lsr 12) land 0x3F));
+     w (0x80 lor ((u lsr 6) land 0x3F));
+     w (0x80 lor (u land 0x3F)))
+
+  let add_utf_16be b u =
+    let w byte = Buffer.add_char b (unsafe_chr byte) in          (* inlined. *)
+    if u < 0x10000 then (w (u lsr 8); w (u land 0xFF)) else
+    let u' = u - 0x10000 in
+    let hi = (0xD800 lor (u' lsr 10)) in
+    let lo = (0xDC00 lor (u' land 0x3FF)) in
+    w (hi lsr 8); w (hi land 0xFF);
+    w (lo lsr 8); w (lo land 0xFF)
+
+  let add_utf_16le b u =                            (* swapped add_utf_16be. *)
+    let w byte = Buffer.add_char b (unsafe_chr byte) in          (* inlined. *)
+    if u < 0x10000 then (w (u land 0xFF); w (u lsr 8)) else
+    let u' = u - 0x10000 in
+    let hi = (0xD800 lor (u' lsr 10)) in
+    let lo = (0xDC00 lor (u' land 0x3FF)) in
+    w (hi land 0xFF); w (hi lsr 8);
+    w (lo land 0xFF); w (lo lsr 8)
+end
+
+(*---------------------------------------------------------------------------
+   Copyright 2012 Daniel C. Bünzli
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+   3. Neither the name of Daniel C. Bünzli nor the names of
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  ---------------------------------------------------------------------------*)