mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-28 17:20:22 +00:00
@@ -261,5 +261,14 @@ module Linguist
|
||||
Language["Makefile"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate "OCaml", "Standard ML" do |data|
|
||||
if /module|let rec |match\s+(\S+\s)+with/.match(data)
|
||||
Language["OCaml"]
|
||||
elsif /=> |case\s+(\S+\s)+of/.match(data)
|
||||
Language["Standard ML"]
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
1344
samples/OCaml/cmdliner.ml
Normal file
1344
samples/OCaml/cmdliner.ml
Normal file
File diff suppressed because it is too large
Load Diff
14
samples/OCaml/common.ml
Normal file
14
samples/OCaml/common.ml
Normal file
@@ -0,0 +1,14 @@
|
||||
(*
|
||||
* Copyright (c) 2013 Jeremy Yallop.
|
||||
*
|
||||
* This file is distributed under the terms of the MIT License.
|
||||
* See the file LICENSE for details.
|
||||
*)
|
||||
|
||||
let string_of format v =
|
||||
let buf = Buffer.create 100 in
|
||||
let fmt = Format.formatter_of_buffer buf in begin
|
||||
format fmt v;
|
||||
Format.pp_print_flush fmt ();
|
||||
Buffer.contents buf
|
||||
end
|
||||
40
samples/OCaml/date.ml
Normal file
40
samples/OCaml/date.ml
Normal file
@@ -0,0 +1,40 @@
|
||||
(*
|
||||
* Copyright (c) 2013 Jeremy Yallop.
|
||||
*
|
||||
* This file is distributed under the terms of the MIT License.
|
||||
* See the file LICENSE for details.
|
||||
*)
|
||||
|
||||
open Ctypes
|
||||
open PosixTypes
|
||||
open Foreign
|
||||
|
||||
type tm
|
||||
let tm = structure "tm"
|
||||
let (-:) ty label = field tm label ty
|
||||
let tm_sec = int -: "tm_sec" (* seconds *)
|
||||
let tm_min = int -: "tm_min" (* minutes *)
|
||||
let tm_hour = int -: "tm_hour" (* hours *)
|
||||
let tm_mday = int -: "tm_mday" (* day of the month *)
|
||||
let tm_mon = int -: "tm_mon" (* month *)
|
||||
let tm_year = int -: "tm_year" (* year *)
|
||||
let tm_wday = int -: "tm_wday" (* day of the week *)
|
||||
let tm_yday = int -: "tm_yday" (* day in the year *)
|
||||
let tm_isdst = int -: "tm_isdst" (* daylight saving time *)
|
||||
let () = seal (tm : tm structure typ)
|
||||
|
||||
let time = foreign "time" ~check_errno:true (ptr time_t @-> returning time_t)
|
||||
|
||||
let asctime = foreign "asctime" (ptr tm @-> returning string)
|
||||
|
||||
let localtime = foreign "localtime" (ptr time_t @-> returning (ptr tm))
|
||||
|
||||
let () = begin
|
||||
let timep = allocate_n ~count:1 time_t in
|
||||
let time = time timep in
|
||||
assert (time = !@timep);
|
||||
let tm = localtime timep in
|
||||
Printf.printf "tm.tm_mon = %d\n" (getf !@tm tm_mon);
|
||||
Printf.printf "tm.tm_year = %d\n" (getf !@tm tm_year);
|
||||
print_endline (asctime tm)
|
||||
end
|
||||
337
samples/OCaml/map.ml
Normal file
337
samples/OCaml/map.ml
Normal file
@@ -0,0 +1,337 @@
|
||||
(***********************************************************************)
|
||||
(* *)
|
||||
(* OCaml *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
||||
(* en Automatique. All rights reserved. This file is distributed *)
|
||||
(* under the terms of the GNU Library General Public License, with *)
|
||||
(* the special exception on linking described in file ../LICENSE. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
module type OrderedType =
|
||||
sig
|
||||
type t
|
||||
val compare: t -> t -> int
|
||||
end
|
||||
|
||||
module type S =
|
||||
sig
|
||||
type key
|
||||
type +'a t
|
||||
val empty: 'a t
|
||||
val is_empty: 'a t -> bool
|
||||
val mem: key -> 'a t -> bool
|
||||
val add: key -> 'a -> 'a t -> 'a t
|
||||
val singleton: key -> 'a -> 'a t
|
||||
val remove: key -> 'a t -> 'a t
|
||||
val merge:
|
||||
(key -> 'a option -> 'b option -> 'c option) -> 'a t -> 'b t -> 'c t
|
||||
val compare: ('a -> 'a -> int) -> 'a t -> 'a t -> int
|
||||
val equal: ('a -> 'a -> bool) -> 'a t -> 'a t -> bool
|
||||
val iter: (key -> 'a -> unit) -> 'a t -> unit
|
||||
val fold: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
|
||||
val for_all: (key -> 'a -> bool) -> 'a t -> bool
|
||||
val exists: (key -> 'a -> bool) -> 'a t -> bool
|
||||
val filter: (key -> 'a -> bool) -> 'a t -> 'a t
|
||||
val partition: (key -> 'a -> bool) -> 'a t -> 'a t * 'a t
|
||||
val cardinal: 'a t -> int
|
||||
val bindings: 'a t -> (key * 'a) list
|
||||
val min_binding: 'a t -> (key * 'a)
|
||||
val max_binding: 'a t -> (key * 'a)
|
||||
val choose: 'a t -> (key * 'a)
|
||||
val split: key -> 'a t -> 'a t * 'a option * 'a t
|
||||
val find: key -> 'a t -> 'a
|
||||
val map: ('a -> 'b) -> 'a t -> 'b t
|
||||
val mapi: (key -> 'a -> 'b) -> 'a t -> 'b t
|
||||
end
|
||||
|
||||
module Make(Ord: OrderedType) = struct
|
||||
|
||||
type key = Ord.t
|
||||
|
||||
type 'a t =
|
||||
Empty
|
||||
| Node of 'a t * key * 'a * 'a t * int
|
||||
|
||||
let height = function
|
||||
Empty -> 0
|
||||
| Node(_,_,_,_,h) -> h
|
||||
|
||||
let create l x d r =
|
||||
let hl = height l and hr = height r in
|
||||
Node(l, x, d, r, (if hl >= hr then hl + 1 else hr + 1))
|
||||
|
||||
let singleton x d = Node(Empty, x, d, Empty, 1)
|
||||
|
||||
let bal l x d r =
|
||||
let hl = match l with Empty -> 0 | Node(_,_,_,_,h) -> h in
|
||||
let hr = match r with Empty -> 0 | Node(_,_,_,_,h) -> h in
|
||||
if hl > hr + 2 then begin
|
||||
match l with
|
||||
Empty -> invalid_arg "Map.bal"
|
||||
| Node(ll, lv, ld, lr, _) ->
|
||||
if height ll >= height lr then
|
||||
create ll lv ld (create lr x d r)
|
||||
else begin
|
||||
match lr with
|
||||
Empty -> invalid_arg "Map.bal"
|
||||
| Node(lrl, lrv, lrd, lrr, _)->
|
||||
create (create ll lv ld lrl) lrv lrd (create lrr x d r)
|
||||
end
|
||||
end else if hr > hl + 2 then begin
|
||||
match r with
|
||||
Empty -> invalid_arg "Map.bal"
|
||||
| Node(rl, rv, rd, rr, _) ->
|
||||
if height rr >= height rl then
|
||||
create (create l x d rl) rv rd rr
|
||||
else begin
|
||||
match rl with
|
||||
Empty -> invalid_arg "Map.bal"
|
||||
| Node(rll, rlv, rld, rlr, _) ->
|
||||
create (create l x d rll) rlv rld (create rlr rv rd rr)
|
||||
end
|
||||
end else
|
||||
Node(l, x, d, r, (if hl >= hr then hl + 1 else hr + 1))
|
||||
|
||||
let empty = Empty
|
||||
|
||||
let is_empty = function Empty -> true | _ -> false
|
||||
|
||||
let rec add x data = function
|
||||
Empty ->
|
||||
Node(Empty, x, data, Empty, 1)
|
||||
| Node(l, v, d, r, h) ->
|
||||
let c = Ord.compare x v in
|
||||
if c = 0 then
|
||||
Node(l, x, data, r, h)
|
||||
else if c < 0 then
|
||||
bal (add x data l) v d r
|
||||
else
|
||||
bal l v d (add x data r)
|
||||
|
||||
let rec find x = function
|
||||
Empty ->
|
||||
raise Not_found
|
||||
| Node(l, v, d, r, _) ->
|
||||
let c = Ord.compare x v in
|
||||
if c = 0 then d
|
||||
else find x (if c < 0 then l else r)
|
||||
|
||||
let rec mem x = function
|
||||
Empty ->
|
||||
false
|
||||
| Node(l, v, d, r, _) ->
|
||||
let c = Ord.compare x v in
|
||||
c = 0 || mem x (if c < 0 then l else r)
|
||||
|
||||
let rec min_binding = function
|
||||
Empty -> raise Not_found
|
||||
| Node(Empty, x, d, r, _) -> (x, d)
|
||||
| Node(l, x, d, r, _) -> min_binding l
|
||||
|
||||
let rec max_binding = function
|
||||
Empty -> raise Not_found
|
||||
| Node(l, x, d, Empty, _) -> (x, d)
|
||||
| Node(l, x, d, r, _) -> max_binding r
|
||||
|
||||
let rec remove_min_binding = function
|
||||
Empty -> invalid_arg "Map.remove_min_elt"
|
||||
| Node(Empty, x, d, r, _) -> r
|
||||
| Node(l, x, d, r, _) -> bal (remove_min_binding l) x d r
|
||||
|
||||
let merge t1 t2 =
|
||||
match (t1, t2) with
|
||||
(Empty, t) -> t
|
||||
| (t, Empty) -> t
|
||||
| (_, _) ->
|
||||
let (x, d) = min_binding t2 in
|
||||
bal t1 x d (remove_min_binding t2)
|
||||
|
||||
let rec remove x = function
|
||||
Empty ->
|
||||
Empty
|
||||
| Node(l, v, d, r, h) ->
|
||||
let c = Ord.compare x v in
|
||||
if c = 0 then
|
||||
merge l r
|
||||
else if c < 0 then
|
||||
bal (remove x l) v d r
|
||||
else
|
||||
bal l v d (remove x r)
|
||||
|
||||
let rec iter f = function
|
||||
Empty -> ()
|
||||
| Node(l, v, d, r, _) ->
|
||||
iter f l; f v d; iter f r
|
||||
|
||||
let rec map f = function
|
||||
Empty ->
|
||||
Empty
|
||||
| Node(l, v, d, r, h) ->
|
||||
let l' = map f l in
|
||||
let d' = f d in
|
||||
let r' = map f r in
|
||||
Node(l', v, d', r', h)
|
||||
|
||||
let rec mapi f = function
|
||||
Empty ->
|
||||
Empty
|
||||
| Node(l, v, d, r, h) ->
|
||||
let l' = mapi f l in
|
||||
let d' = f v d in
|
||||
let r' = mapi f r in
|
||||
Node(l', v, d', r', h)
|
||||
|
||||
let rec fold f m accu =
|
||||
match m with
|
||||
Empty -> accu
|
||||
| Node(l, v, d, r, _) ->
|
||||
fold f r (f v d (fold f l accu))
|
||||
|
||||
let rec for_all p = function
|
||||
Empty -> true
|
||||
| Node(l, v, d, r, _) -> p v d && for_all p l && for_all p r
|
||||
|
||||
let rec exists p = function
|
||||
Empty -> false
|
||||
| Node(l, v, d, r, _) -> p v d || exists p l || exists p r
|
||||
|
||||
(* Beware: those two functions assume that the added k is *strictly*
|
||||
smaller (or bigger) than all the present keys in the tree; it
|
||||
does not test for equality with the current min (or max) key.
|
||||
|
||||
Indeed, they are only used during the "join" operation which
|
||||
respects this precondition.
|
||||
*)
|
||||
|
||||
let rec add_min_binding k v = function
|
||||
| Empty -> singleton k v
|
||||
| Node (l, x, d, r, h) ->
|
||||
bal (add_min_binding k v l) x d r
|
||||
|
||||
let rec add_max_binding k v = function
|
||||
| Empty -> singleton k v
|
||||
| Node (l, x, d, r, h) ->
|
||||
bal l x d (add_max_binding k v r)
|
||||
|
||||
(* Same as create and bal, but no assumptions are made on the
|
||||
relative heights of l and r. *)
|
||||
|
||||
let rec join l v d r =
|
||||
match (l, r) with
|
||||
(Empty, _) -> add_min_binding v d r
|
||||
| (_, Empty) -> add_max_binding v d l
|
||||
| (Node(ll, lv, ld, lr, lh), Node(rl, rv, rd, rr, rh)) ->
|
||||
if lh > rh + 2 then bal ll lv ld (join lr v d r) else
|
||||
if rh > lh + 2 then bal (join l v d rl) rv rd rr else
|
||||
create l v d r
|
||||
|
||||
(* Merge two trees l and r into one.
|
||||
All elements of l must precede the elements of r.
|
||||
No assumption on the heights of l and r. *)
|
||||
|
||||
let concat t1 t2 =
|
||||
match (t1, t2) with
|
||||
(Empty, t) -> t
|
||||
| (t, Empty) -> t
|
||||
| (_, _) ->
|
||||
let (x, d) = min_binding t2 in
|
||||
join t1 x d (remove_min_binding t2)
|
||||
|
||||
let concat_or_join t1 v d t2 =
|
||||
match d with
|
||||
| Some d -> join t1 v d t2
|
||||
| None -> concat t1 t2
|
||||
|
||||
let rec split x = function
|
||||
Empty ->
|
||||
(Empty, None, Empty)
|
||||
| Node(l, v, d, r, _) ->
|
||||
let c = Ord.compare x v in
|
||||
if c = 0 then (l, Some d, r)
|
||||
else if c < 0 then
|
||||
let (ll, pres, rl) = split x l in (ll, pres, join rl v d r)
|
||||
else
|
||||
let (lr, pres, rr) = split x r in (join l v d lr, pres, rr)
|
||||
|
||||
let rec merge f s1 s2 =
|
||||
match (s1, s2) with
|
||||
(Empty, Empty) -> Empty
|
||||
| (Node (l1, v1, d1, r1, h1), _) when h1 >= height s2 ->
|
||||
let (l2, d2, r2) = split v1 s2 in
|
||||
concat_or_join (merge f l1 l2) v1 (f v1 (Some d1) d2) (merge f r1 r2)
|
||||
| (_, Node (l2, v2, d2, r2, h2)) ->
|
||||
let (l1, d1, r1) = split v2 s1 in
|
||||
concat_or_join (merge f l1 l2) v2 (f v2 d1 (Some d2)) (merge f r1 r2)
|
||||
| _ ->
|
||||
assert false
|
||||
|
||||
let rec filter p = function
|
||||
Empty -> Empty
|
||||
| Node(l, v, d, r, _) ->
|
||||
(* call [p] in the expected left-to-right order *)
|
||||
let l' = filter p l in
|
||||
let pvd = p v d in
|
||||
let r' = filter p r in
|
||||
if pvd then join l' v d r' else concat l' r'
|
||||
|
||||
let rec partition p = function
|
||||
Empty -> (Empty, Empty)
|
||||
| Node(l, v, d, r, _) ->
|
||||
(* call [p] in the expected left-to-right order *)
|
||||
let (lt, lf) = partition p l in
|
||||
let pvd = p v d in
|
||||
let (rt, rf) = partition p r in
|
||||
if pvd
|
||||
then (join lt v d rt, concat lf rf)
|
||||
else (concat lt rt, join lf v d rf)
|
||||
|
||||
type 'a enumeration = End | More of key * 'a * 'a t * 'a enumeration
|
||||
|
||||
let rec cons_enum m e =
|
||||
match m with
|
||||
Empty -> e
|
||||
| Node(l, v, d, r, _) -> cons_enum l (More(v, d, r, e))
|
||||
|
||||
let compare cmp m1 m2 =
|
||||
let rec compare_aux e1 e2 =
|
||||
match (e1, e2) with
|
||||
(End, End) -> 0
|
||||
| (End, _) -> -1
|
||||
| (_, End) -> 1
|
||||
| (More(v1, d1, r1, e1), More(v2, d2, r2, e2)) ->
|
||||
let c = Ord.compare v1 v2 in
|
||||
if c <> 0 then c else
|
||||
let c = cmp d1 d2 in
|
||||
if c <> 0 then c else
|
||||
compare_aux (cons_enum r1 e1) (cons_enum r2 e2)
|
||||
in compare_aux (cons_enum m1 End) (cons_enum m2 End)
|
||||
|
||||
let equal cmp m1 m2 =
|
||||
let rec equal_aux e1 e2 =
|
||||
match (e1, e2) with
|
||||
(End, End) -> true
|
||||
| (End, _) -> false
|
||||
| (_, End) -> false
|
||||
| (More(v1, d1, r1, e1), More(v2, d2, r2, e2)) ->
|
||||
Ord.compare v1 v2 = 0 && cmp d1 d2 &&
|
||||
equal_aux (cons_enum r1 e1) (cons_enum r2 e2)
|
||||
in equal_aux (cons_enum m1 End) (cons_enum m2 End)
|
||||
|
||||
let rec cardinal = function
|
||||
Empty -> 0
|
||||
| Node(l, _, _, r, _) -> cardinal l + 1 + cardinal r
|
||||
|
||||
let rec bindings_aux accu = function
|
||||
Empty -> accu
|
||||
| Node(l, v, d, r, _) -> bindings_aux ((v, d) :: bindings_aux accu r) l
|
||||
|
||||
let bindings s =
|
||||
bindings_aux [] s
|
||||
|
||||
let choose = min_binding
|
||||
|
||||
end
|
||||
2503
samples/OCaml/mirage.ml
Normal file
2503
samples/OCaml/mirage.ml
Normal file
File diff suppressed because it is too large
Load Diff
125
samples/OCaml/reload.ml
Normal file
125
samples/OCaml/reload.ml
Normal file
@@ -0,0 +1,125 @@
|
||||
(***********************************************************************)
|
||||
(* *)
|
||||
(* OCaml *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 2000 Institut National de Recherche en Informatique et *)
|
||||
(* en Automatique. All rights reserved. This file is distributed *)
|
||||
(* under the terms of the Q Public License version 1.0. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
open Cmm
|
||||
open Arch
|
||||
open Reg
|
||||
open Mach
|
||||
|
||||
(* Reloading for the AMD64 *)
|
||||
|
||||
(* Summary of instruction set constraints:
|
||||
"S" means either stack or register, "R" means register only.
|
||||
Operation Res Arg1 Arg2
|
||||
Imove R S
|
||||
or S R
|
||||
Iconst_int S if 32-bit signed, R otherwise
|
||||
Iconst_float R
|
||||
Iconst_symbol (not PIC) S
|
||||
Iconst_symbol (PIC) R
|
||||
Icall_ind R
|
||||
Itailcall_ind R
|
||||
Iload R R R
|
||||
Istore R R
|
||||
Iintop(Icomp) R R S
|
||||
or S S R
|
||||
Iintop(Imul|Idiv|mod) R R S
|
||||
Iintop(shift) S S R
|
||||
Iintop(others) R R S
|
||||
or S S R
|
||||
Iintop_imm(Iadd, n)/lea R R
|
||||
Iintop_imm(others) S S
|
||||
Inegf...Idivf R R S
|
||||
Ifloatofint R S
|
||||
Iintoffloat R S
|
||||
Ispecific(Ilea) R R R
|
||||
Ispecific(Ifloatarithmem) R R R
|
||||
|
||||
Conditional branches:
|
||||
Iinttest S R
|
||||
or R S
|
||||
Ifloattest R S (or S R if swapped test)
|
||||
other tests S
|
||||
*)
|
||||
|
||||
let stackp r =
|
||||
match r.loc with
|
||||
Stack _ -> true
|
||||
| _ -> false
|
||||
|
||||
class reload = object (self)
|
||||
|
||||
inherit Reloadgen.reload_generic as super
|
||||
|
||||
method! reload_operation op arg res =
|
||||
match op with
|
||||
| Iintop(Iadd|Isub|Iand|Ior|Ixor|Icomp _|Icheckbound) ->
|
||||
(* One of the two arguments can reside in the stack, but not both *)
|
||||
if stackp arg.(0) && stackp arg.(1)
|
||||
then ([|arg.(0); self#makereg arg.(1)|], res)
|
||||
else (arg, res)
|
||||
| Iintop_imm(Iadd, _) when arg.(0).loc <> res.(0).loc ->
|
||||
(* This add will be turned into a lea; args and results must be
|
||||
in registers *)
|
||||
super#reload_operation op arg res
|
||||
| Iintop(Idiv | Imod | Ilsl | Ilsr | Iasr)
|
||||
| Iintop_imm(_, _) ->
|
||||
(* The argument(s) and results can be either in register or on stack *)
|
||||
(* Note: Idiv, Imod: arg(0) and res(0) already forced in regs
|
||||
Ilsl, Ilsr, Iasr: arg(1) already forced in regs *)
|
||||
(arg, res)
|
||||
| Iintop(Imul) | Iaddf | Isubf | Imulf | Idivf ->
|
||||
(* First argument (= result) must be in register, second arg
|
||||
can reside in the stack *)
|
||||
if stackp arg.(0)
|
||||
then (let r = self#makereg arg.(0) in ([|r; arg.(1)|], [|r|]))
|
||||
else (arg, res)
|
||||
| Ifloatofint | Iintoffloat ->
|
||||
(* Result must be in register, but argument can be on stack *)
|
||||
(arg, (if stackp res.(0) then [| self#makereg res.(0) |] else res))
|
||||
| Iconst_int n ->
|
||||
if n <= 0x7FFFFFFFn && n >= -0x80000000n
|
||||
then (arg, res)
|
||||
else super#reload_operation op arg res
|
||||
| Iconst_symbol _ ->
|
||||
if !pic_code || !Clflags.dlcode
|
||||
then super#reload_operation op arg res
|
||||
else (arg, res)
|
||||
| _ -> (* Other operations: all args and results in registers *)
|
||||
super#reload_operation op arg res
|
||||
|
||||
method! reload_test tst arg =
|
||||
match tst with
|
||||
Iinttest cmp ->
|
||||
(* One of the two arguments can reside on stack *)
|
||||
if stackp arg.(0) && stackp arg.(1)
|
||||
then [| self#makereg arg.(0); arg.(1) |]
|
||||
else arg
|
||||
| Ifloattest((Clt|Cle), _) ->
|
||||
(* Cf. emit.mlp: we swap arguments in this case *)
|
||||
(* First argument can be on stack, second must be in register *)
|
||||
if stackp arg.(1)
|
||||
then [| arg.(0); self#makereg arg.(1) |]
|
||||
else arg
|
||||
| Ifloattest((Ceq|Cne|Cgt|Cge), _) ->
|
||||
(* Second argument can be on stack, first must be in register *)
|
||||
if stackp arg.(0)
|
||||
then [| self#makereg arg.(0); arg.(1) |]
|
||||
else arg
|
||||
| _ ->
|
||||
(* The argument(s) can be either in register or on stack *)
|
||||
arg
|
||||
|
||||
end
|
||||
|
||||
let fundecl f =
|
||||
(new reload)#fundecl f
|
||||
70
samples/OCaml/sigset.ml
Normal file
70
samples/OCaml/sigset.ml
Normal file
@@ -0,0 +1,70 @@
|
||||
(*
|
||||
* Copyright (c) 2013 Jeremy Yallop.
|
||||
*
|
||||
* This file is distributed under the terms of the MIT License.
|
||||
* See the file LICENSE for details.
|
||||
*)
|
||||
|
||||
open PosixTypes
|
||||
open Ctypes
|
||||
open Foreign
|
||||
|
||||
type t = sigset_t ptr
|
||||
|
||||
let t = ptr sigset_t
|
||||
|
||||
(* This function initializes the signal set set to exclude all of the defined
|
||||
signals. It always returns 0. *)
|
||||
let sigemptyset = foreign "sigemptyset" (ptr sigset_t @-> returning int)
|
||||
|
||||
let empty () =
|
||||
let setp = allocate_n ~count:1 sigset_t in begin
|
||||
ignore (sigemptyset setp);
|
||||
setp
|
||||
end
|
||||
|
||||
(* This function initializes the signal set set to include all of the defined
|
||||
signals. Again, the return value is 0. *)
|
||||
let sigfillset = foreign "sigfillset" (ptr sigset_t @-> returning int)
|
||||
|
||||
let full () =
|
||||
let setp = allocate_n ~count:1 sigset_t in begin
|
||||
ignore (sigfillset setp);
|
||||
setp
|
||||
end
|
||||
|
||||
(* This function adds the signal signum to the signal set set. All sigaddset
|
||||
does is modify set; it does not block or unblock any signals.
|
||||
|
||||
The return value is 0 on success and -1 on failure. The following errno
|
||||
error condition is defined for this function:
|
||||
|
||||
EINVAL The signum argument doesn't specify a valid signal.
|
||||
*)
|
||||
let sigaddset = foreign "sigaddset" ~check_errno:true
|
||||
(ptr sigset_t @-> int @-> returning int)
|
||||
|
||||
let add set signal = ignore (sigaddset set signal)
|
||||
|
||||
(* This function removes the signal signum from the signal set set. All
|
||||
sigdelset does is modify set; it does not block or unblock any signals.
|
||||
|
||||
The return value and error conditions are the same as for
|
||||
sigaddset. *)
|
||||
let sigdelset = foreign "sigdelset" ~check_errno:true
|
||||
(ptr sigset_t @-> int @-> returning int)
|
||||
|
||||
let del set signal = ignore (sigdelset set signal)
|
||||
|
||||
(* The sigismember function tests whether the signal signum is a member of the
|
||||
signal set set. It returns 1 if the signal is in the set, 0 if not, and -1 if
|
||||
there is an error.
|
||||
|
||||
The following errno error condition is defined for this function:
|
||||
|
||||
EINVAL The signum argument doesn't specify a valid signal.
|
||||
*)
|
||||
let sigismember = foreign "sigismember" ~check_errno:true
|
||||
(ptr sigset_t @-> int @-> returning int)
|
||||
|
||||
let mem set signal = sigismember set signal <> 0
|
||||
810
samples/OCaml/uutf.ml
Normal file
810
samples/OCaml/uutf.ml
Normal file
@@ -0,0 +1,810 @@
|
||||
(*---------------------------------------------------------------------------
|
||||
Copyright 2012 Daniel C. Bünzli. All rights reserved.
|
||||
Distributed under the BSD3 license, see license at the end of the file.
|
||||
%%NAME%% release %%VERSION%%
|
||||
---------------------------------------------------------------------------*)
|
||||
|
||||
let io_buffer_size = 65536 (* IO_BUFFER_SIZE 4.0.0 *)
|
||||
|
||||
let pp = Format.fprintf
|
||||
let invalid_encode () = invalid_arg "expected `Await encode"
|
||||
let invalid_bounds j l =
|
||||
invalid_arg (Printf.sprintf "invalid bounds (index %d, length %d)" j l)
|
||||
|
||||
(* Unsafe string byte manipulations. If you don't believe the author's
|
||||
invariants, replacing with safe versions makes everything safe in
|
||||
the module. He won't be upset. *)
|
||||
|
||||
let unsafe_chr = Char.unsafe_chr
|
||||
let unsafe_blit = String.unsafe_blit
|
||||
let unsafe_array_get = Array.unsafe_get
|
||||
let unsafe_byte s j = Char.code (String.unsafe_get s j)
|
||||
let unsafe_set_byte s j byte = String.unsafe_set s j (Char.unsafe_chr byte)
|
||||
|
||||
(* Unicode characters *)
|
||||
|
||||
type uchar = int
|
||||
let u_bom = 0xFEFF (* BOM. *)
|
||||
let u_rep = 0xFFFD (* replacement character. *)
|
||||
let is_uchar cp =
|
||||
(0x0000 <= cp && cp <= 0xD7FF) || (0xE000 <= cp && cp <= 0x10FFFF)
|
||||
|
||||
let pp_cp ppf cp =
|
||||
if cp < 0 || cp > 0x10FFFF then pp ppf "U+Invalid(%X)" cp else
|
||||
if cp <= 0xFFFF then pp ppf "U+%04X" cp else
|
||||
pp ppf "U+%X" cp
|
||||
|
||||
let cp_to_string cp = (* NOT thread safe. *)
|
||||
pp Format.str_formatter "%a" pp_cp cp; Format.flush_str_formatter ()
|
||||
|
||||
(* Unicode encoding schemes *)
|
||||
|
||||
type encoding = [ `UTF_8 | `UTF_16 | `UTF_16BE | `UTF_16LE ]
|
||||
type decoder_encoding = [ encoding | `US_ASCII | `ISO_8859_1 ]
|
||||
|
||||
let encoding_of_string s = match String.uppercase s with (* IANA names. *)
|
||||
| "UTF-8" -> Some `UTF_8
|
||||
| "UTF-16" -> Some `UTF_16
|
||||
| "UTF-16LE" -> Some `UTF_16LE
|
||||
| "UTF-16BE" -> Some `UTF_16BE
|
||||
| "ANSI_X3.4-1968" | "ISO-IR-6" | "ANSI_X3.4-1986" | "ISO_646.IRV:1991"
|
||||
| "ASCII" | "ISO646-US" | "US-ASCII" | "US" | "IBM367" | "CP367" | "CSASCII" ->
|
||||
Some `US_ASCII
|
||||
| "ISO_8859-1:1987" | "ISO-IR-100" | "ISO_8859-1" | "ISO-8859-1"
|
||||
| "LATIN1" | "L1" | "IBM819" | "CP819" | "CSISOLATIN1" ->
|
||||
Some `ISO_8859_1
|
||||
| _ -> None
|
||||
|
||||
let encoding_to_string = function
|
||||
| `UTF_8 -> "UTF-8" | `UTF_16 -> "UTF-16" | `UTF_16BE -> "UTF-16BE"
|
||||
| `UTF_16LE -> "UTF-16LE" | `US_ASCII -> "US-ASCII"
|
||||
| `ISO_8859_1 -> "ISO-8859-1"
|
||||
|
||||
(* Base character decoders. They assume enough data. *)
|
||||
|
||||
let malformed s j l = `Malformed (String.sub s j l)
|
||||
let malformed_pair be hi s j l = (* missing or half low surrogate at eoi. *)
|
||||
let bs1 = String.sub s j l in
|
||||
let bs0 = String.create 2 in
|
||||
let j0, j1 = if be then (0, 1) else (1, 0) in
|
||||
unsafe_set_byte bs0 j0 (hi lsr 8);
|
||||
unsafe_set_byte bs0 j1 (hi land 0xFF);
|
||||
`Malformed (bs0 ^ bs1)
|
||||
|
||||
let r_us_ascii s j =
|
||||
(* assert (0 <= j && j < String.length s); *)
|
||||
let b0 = unsafe_byte s j in
|
||||
if b0 <= 127 then `Uchar b0 else malformed s j 1
|
||||
|
||||
let r_iso_8859_1 s j =
|
||||
(* assert (0 <= j && j < String.length s); *)
|
||||
`Uchar (unsafe_byte s j)
|
||||
|
||||
let utf_8_len = [| (* uchar byte length according to first UTF-8 byte. *)
|
||||
1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
|
||||
1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
|
||||
1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
|
||||
1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
|
||||
1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
|
||||
1; 1; 1; 1; 1; 1; 1; 1; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
|
||||
0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
|
||||
0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
|
||||
0; 0; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2;
|
||||
2; 2; 2; 2; 2; 2; 2; 2; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3;
|
||||
4; 4; 4; 4; 4; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0 |]
|
||||
|
||||
let r_utf_8 s j l =
|
||||
(* assert (0 <= j && 0 <= l && j + l <= String.length s); *)
|
||||
match l with
|
||||
| 1 -> `Uchar (unsafe_byte s j)
|
||||
| 2 ->
|
||||
let b0 = unsafe_byte s j in let b1 = unsafe_byte s (j + 1) in
|
||||
if b1 lsr 6 != 0b10 then malformed s j l else
|
||||
`Uchar (((b0 land 0x1F) lsl 6) lor (b1 land 0x3F))
|
||||
| 3 ->
|
||||
let b0 = unsafe_byte s j in let b1 = unsafe_byte s (j + 1) in
|
||||
let b2 = unsafe_byte s (j + 2) in
|
||||
let c = `Uchar (((b0 land 0x0F) lsl 12) lor
|
||||
((b1 land 0x3F) lsl 6) lor
|
||||
(b2 land 0x3F))
|
||||
in
|
||||
if b2 lsr 6 != 0b10 then malformed s j l else
|
||||
begin match b0 with
|
||||
| 0xE0 -> if b1 < 0xA0 || 0xBF < b1 then malformed s j l else c
|
||||
| 0xED -> if b1 < 0x80 || 0x9F < b1 then malformed s j l else c
|
||||
| _ -> if b1 lsr 6 != 0b10 then malformed s j l else c
|
||||
end
|
||||
| 4 ->
|
||||
let b0 = unsafe_byte s j in let b1 = unsafe_byte s (j + 1) in
|
||||
let b2 = unsafe_byte s (j + 2) in let b3 = unsafe_byte s (j + 3) in
|
||||
let c = `Uchar (((b0 land 0x07) lsl 18) lor
|
||||
((b1 land 0x3F) lsl 12) lor
|
||||
((b2 land 0x3F) lsl 6) lor
|
||||
(b3 land 0x3F))
|
||||
in
|
||||
if b3 lsr 6 != 0b10 || b2 lsr 6 != 0b10 then malformed s j l else
|
||||
begin match b0 with
|
||||
| 0xF0 -> if b1 < 0x90 || 0xBF < b1 then malformed s j l else c
|
||||
| 0xF4 -> if b1 < 0x80 || 0x8F < b1 then malformed s j l else c
|
||||
| _ -> if b1 lsr 6 != 0b10 then malformed s j l else c
|
||||
end
|
||||
| _ -> assert false
|
||||
|
||||
let r_utf_16 s j0 j1 = (* May return a high surrogate. *)
|
||||
(* assert (0 <= j0 && 0 <= j1 && max j0 j1 < String.length s); *)
|
||||
let b0 = unsafe_byte s j0 in let b1 = unsafe_byte s j1 in
|
||||
let u = (b0 lsl 8) lor b1 in
|
||||
if u < 0xD800 || u > 0xDFFF then `Uchar u else
|
||||
if u > 0xDBFF then malformed s (min j0 j1) 2 else `Hi u
|
||||
|
||||
let r_utf_16_lo hi s j0 j1 = (* Combines [hi] with a low surrogate. *)
|
||||
(* assert (0 <= j0 && 0 <= j1 && max j0 j1 < String.length s); *)
|
||||
let b0 = unsafe_byte s j0 in
|
||||
let b1 = unsafe_byte s j1 in
|
||||
let lo = (b0 lsl 8) lor b1 in
|
||||
if lo < 0xDC00 || lo > 0xDFFF
|
||||
then malformed_pair (j0 < j1 (* true => be *)) hi s (min j0 j1) 2
|
||||
else `Uchar ((((hi land 0x3FF) lsl 10) lor (lo land 0x3FF)) + 0x10000)
|
||||
|
||||
let r_encoding s j l = (* guess encoding with max. 3 bytes. *)
|
||||
(* assert (0 <= j && 0 <= l && j + l <= String.length s) *)
|
||||
let some i = if i < l then Some (unsafe_byte s (j + i)) else None in
|
||||
match (some 0), (some 1), (some 2) with
|
||||
| Some 0xEF, Some 0xBB, Some 0xBF -> `UTF_8 `BOM
|
||||
| Some 0xFE, Some 0xFF, _ -> `UTF_16BE `BOM
|
||||
| Some 0xFF, Some 0xFE, _ -> `UTF_16LE `BOM
|
||||
| Some 0x00, Some p, _ when p > 0 -> `UTF_16BE (`ASCII p)
|
||||
| Some p, Some 0x00, _ when p > 0 -> `UTF_16LE (`ASCII p)
|
||||
| Some u, _, _ when utf_8_len.(u) <> 0 -> `UTF_8 `Decode
|
||||
| Some _, Some _, _ -> `UTF_16BE `Decode
|
||||
| Some _, None , None -> `UTF_8 `Decode
|
||||
| None , None , None -> `UTF_8 `End
|
||||
| None , Some _, _ -> assert false
|
||||
| Some _, None , Some _ -> assert false
|
||||
| None , None , Some _ -> assert false
|
||||
|
||||
(* Decode *)
|
||||
|
||||
type src = [ `Channel of in_channel | `String of string | `Manual ]
|
||||
type nln = [ `ASCII of uchar | `NLF of uchar | `Readline of uchar ]
|
||||
type decode = [ `Await | `End | `Malformed of string | `Uchar of uchar]
|
||||
|
||||
let pp_decode ppf = function
|
||||
| `Uchar u -> pp ppf "@[`Uchar %a@]" pp_cp u
|
||||
| `End -> pp ppf "`End"
|
||||
| `Await -> pp ppf "`Await"
|
||||
| `Malformed bs ->
|
||||
let l = String.length bs in
|
||||
pp ppf "@[`Malformed (";
|
||||
if l > 0 then pp ppf "%02X" (Char.code (bs.[0]));
|
||||
for i = 1 to l - 1 do pp ppf " %02X" (Char.code (bs.[i])) done;
|
||||
pp ppf ")@]"
|
||||
|
||||
type decoder =
|
||||
{ src : src; (* input source. *)
|
||||
mutable encoding : decoder_encoding; (* decoded encoding. *)
|
||||
nln : nln option; (* newline normalization (if any). *)
|
||||
nl : int; (* newline normalization character. *)
|
||||
mutable i : string; (* current input chunk. *)
|
||||
mutable i_pos : int; (* input current position. *)
|
||||
mutable i_max : int; (* input maximal position. *)
|
||||
t : string; (* four bytes temporary buffer for overlapping reads. *)
|
||||
mutable t_len : int; (* current byte length of [t]. *)
|
||||
mutable t_need : int; (* number of bytes needed in [t]. *)
|
||||
mutable removed_bom : bool; (* [true] if an initial BOM was removed. *)
|
||||
mutable last_cr : bool; (* [true] if last char was CR. *)
|
||||
mutable line : int; (* line number. *)
|
||||
mutable col : int; (* column number. *)
|
||||
mutable byte_count : int; (* byte count. *)
|
||||
mutable count : int; (* char count. *)
|
||||
mutable pp : (* decoder post-processor for BOM, position and nln. *)
|
||||
decoder -> [ `Malformed of string | `Uchar of uchar ] -> decode;
|
||||
mutable k : decoder -> decode } (* decoder continuation. *)
|
||||
|
||||
(* On decodes that overlap two (or more) [d.i] buffers, we use [t_fill] to copy
|
||||
the input data to [d.t] and decode from there. If the [d.i] buffers are not
|
||||
too small this is faster than continuation based byte per byte writes.
|
||||
|
||||
End of input (eoi) is signalled by [d.i_pos = 0] and [d.i_max = min_int]
|
||||
which implies that [i_rem d < 0] is [true]. *)
|
||||
|
||||
let i_rem d = d.i_max - d.i_pos + 1 (* remaining bytes to read in [d.i]. *)
|
||||
let eoi d = d.i <- ""; d.i_pos <- 0; d.i_max <- min_int (* set eoi in [d]. *)
|
||||
let src d s j l = (* set [d.i] with [s]. *)
|
||||
if (j < 0 || l < 0 || j + l > String.length s) then invalid_bounds j l else
|
||||
if (l = 0) then eoi d else
|
||||
(d.i <- s; d.i_pos <- j; d.i_max <- j + l - 1)
|
||||
|
||||
let refill k d = match d.src with (* get new input in [d.i] and [k]ontinue. *)
|
||||
| `Manual -> d.k <- k; `Await
|
||||
| `String _ -> eoi d; k d
|
||||
| `Channel ic ->
|
||||
let rc = input ic d.i 0 (String.length d.i) in
|
||||
(src d d.i 0 rc; k d)
|
||||
|
||||
let t_need d need = d.t_len <- 0; d.t_need <- need
|
||||
let rec t_fill k d = (* get [d.t_need] bytes (or less if eoi) in [i.t]. *)
|
||||
let blit d l =
|
||||
unsafe_blit d.i d.i_pos d.t d.t_len (* write pos. *) l;
|
||||
d.i_pos <- d.i_pos + l; d.t_len <- d.t_len + l;
|
||||
in
|
||||
let rem = i_rem d in
|
||||
if rem < 0 (* eoi *) then k d else
|
||||
let need = d.t_need - d.t_len in
|
||||
if rem < need then (blit d rem; refill (t_fill k) d) else (blit d need; k d)
|
||||
|
||||
let ret k v byte_count d = (* return post-processed [v]. *)
|
||||
d.k <- k; d.byte_count <- d.byte_count + byte_count; d.pp d v
|
||||
|
||||
(* Decoders. *)
|
||||
|
||||
let rec decode_us_ascii d =
|
||||
let rem = i_rem d in
|
||||
if rem <= 0 then (if rem < 0 then `End else refill decode_us_ascii d) else
|
||||
let j = d.i_pos in
|
||||
d.i_pos <- d.i_pos + 1; ret decode_us_ascii (r_us_ascii d.i j) 1 d
|
||||
|
||||
let rec decode_iso_8859_1 d =
|
||||
let rem = i_rem d in
|
||||
if rem <= 0 then (if rem < 0 then `End else refill decode_iso_8859_1 d) else
|
||||
let j = d.i_pos in
|
||||
d.i_pos <- d.i_pos + 1; ret decode_iso_8859_1 (r_iso_8859_1 d.i j) 1 d
|
||||
|
||||
(* UTF-8 decoder *)
|
||||
|
||||
let rec t_decode_utf_8 d = (* decode from [d.t]. *)
|
||||
if d.t_len < d.t_need
|
||||
then ret decode_utf_8 (malformed d.t 0 d.t_len) d.t_len d
|
||||
else ret decode_utf_8 (r_utf_8 d.t 0 d.t_len) d.t_len d
|
||||
|
||||
and decode_utf_8 d =
|
||||
let rem = i_rem d in
|
||||
if rem <= 0 then (if rem < 0 then `End else refill decode_utf_8 d) else
|
||||
let need = unsafe_array_get utf_8_len (unsafe_byte d.i d.i_pos) in
|
||||
if rem < need then (t_need d need; t_fill t_decode_utf_8 d) else
|
||||
let j = d.i_pos in
|
||||
if need = 0
|
||||
then (d.i_pos <- d.i_pos + 1; ret decode_utf_8 (malformed d.i j 1) 1 d)
|
||||
else (d.i_pos <- d.i_pos + need; ret decode_utf_8 (r_utf_8 d.i j need) need d)
|
||||
|
||||
(* UTF-16BE decoder *)
|
||||
|
||||
let rec t_decode_utf_16be_lo hi d = (* decode from [d.t]. *)
|
||||
let bcount = d.t_len + 2 (* hi count *) in
|
||||
if d.t_len < d.t_need
|
||||
then ret decode_utf_16be (malformed_pair true hi d.t 0 d.t_len) bcount d
|
||||
else ret decode_utf_16be (r_utf_16_lo hi d.t 0 1) bcount d
|
||||
|
||||
and t_decode_utf_16be d = (* decode from [d.t]. *)
|
||||
if d.t_len < d.t_need
|
||||
then ret decode_utf_16be (malformed d.t 0 d.t_len) d.t_len d
|
||||
else decode_utf_16be_lo (r_utf_16 d.t 0 1) d
|
||||
|
||||
and decode_utf_16be_lo v d = match v with
|
||||
| `Uchar _ | `Malformed _ as v -> ret decode_utf_16be v 2 d
|
||||
| `Hi hi ->
|
||||
let rem = i_rem d in
|
||||
if rem < 2 then (t_need d 2; t_fill (t_decode_utf_16be_lo hi) d) else
|
||||
let j = d.i_pos in
|
||||
d.i_pos <- d.i_pos + 2;
|
||||
ret decode_utf_16be (r_utf_16_lo hi d.i j (j + 1)) 4 d
|
||||
|
||||
and decode_utf_16be d =
|
||||
let rem = i_rem d in
|
||||
if rem <= 0 then (if rem < 0 then `End else refill decode_utf_16be d) else
|
||||
if rem < 2 then (t_need d 2; t_fill t_decode_utf_16be d) else
|
||||
let j = d.i_pos in
|
||||
d.i_pos <- d.i_pos + 2; decode_utf_16be_lo (r_utf_16 d.i j (j + 1)) d
|
||||
|
||||
(* UTF-16LE decoder, same as UTF-16BE with byte swapped. *)
|
||||
|
||||
let rec t_decode_utf_16le_lo hi d = (* decode from [d.t]. *)
|
||||
let bcount = d.t_len + 2 (* hi count *) in
|
||||
if d.t_len < d.t_need
|
||||
then ret decode_utf_16le (malformed_pair false hi d.t 0 d.t_len) bcount d
|
||||
else ret decode_utf_16le (r_utf_16_lo hi d.t 1 0) bcount d
|
||||
|
||||
and t_decode_utf_16le d = (* decode from [d.t]. *)
|
||||
if d.t_len < d.t_need
|
||||
then ret decode_utf_16le (malformed d.t 0 d.t_len) d.t_len d
|
||||
else decode_utf_16le_lo (r_utf_16 d.t 1 0) d
|
||||
|
||||
and decode_utf_16le_lo v d = match v with
|
||||
| `Uchar _ | `Malformed _ as v -> ret decode_utf_16le v 2 d
|
||||
| `Hi hi ->
|
||||
let rem = i_rem d in
|
||||
if rem < 2 then (t_need d 2; t_fill (t_decode_utf_16le_lo hi) d) else
|
||||
let j = d.i_pos in
|
||||
d.i_pos <- d.i_pos + 2;
|
||||
ret decode_utf_16le (r_utf_16_lo hi d.i (j + 1) j) 4 d
|
||||
|
||||
and decode_utf_16le d =
|
||||
let rem = i_rem d in
|
||||
if rem <= 0 then (if rem < 0 then `End else refill decode_utf_16le d) else
|
||||
if rem < 2 then (t_need d 2; t_fill t_decode_utf_16le d) else
|
||||
let j = d.i_pos in
|
||||
d.i_pos <- d.i_pos + 2; decode_utf_16le_lo (r_utf_16 d.i (j + 1) j) d
|
||||
|
||||
(* Encoding guessing. The guess is simple but starting the decoder
|
||||
after is tedious, uutf's decoders are not designed to put bytes
|
||||
back in the stream. *)
|
||||
|
||||
let guessed_utf_8 d = (* start decoder after `UTF_8 guess. *)
|
||||
let b3 d = (* handles the third read byte. *)
|
||||
let b3 = unsafe_byte d.t 2 in
|
||||
match utf_8_len.(b3) with
|
||||
| 0 -> ret decode_utf_8 (malformed d.t 2 1) 1 d
|
||||
| n ->
|
||||
d.t_need <- n; d.t_len <- 1; unsafe_set_byte d.t 0 b3;
|
||||
t_fill t_decode_utf_8 d
|
||||
in
|
||||
let b2 d = (* handle second read byte. *)
|
||||
let b2 = unsafe_byte d.t 1 in
|
||||
let b3 = if d.t_len > 2 then b3 else decode_utf_8 (* decodes `End *) in
|
||||
match utf_8_len.(b2) with
|
||||
| 0 -> ret b3 (malformed d.t 1 1) 1 d
|
||||
| 1 -> ret b3 (r_utf_8 d.t 1 1) 1 d
|
||||
| n -> (* copy d.t.(1-2) to d.t.(0-1) and decode *)
|
||||
d.t_need <- n;
|
||||
unsafe_set_byte d.t 0 b2;
|
||||
if (d.t_len < 3) then d.t_len <- 1 else
|
||||
(d.t_len <- 2; unsafe_set_byte d.t 1 (unsafe_byte d.t 2); );
|
||||
t_fill t_decode_utf_8 d
|
||||
in
|
||||
let b1 = unsafe_byte d.t 0 in (* handle first read byte. *)
|
||||
let b2 = if d.t_len > 1 then b2 else decode_utf_8 (* decodes `End *) in
|
||||
match utf_8_len.(b1) with
|
||||
| 0 -> ret b2 (malformed d.t 0 1) 1 d
|
||||
| 1 -> ret b2 (r_utf_8 d.t 0 1) 1 d
|
||||
| 2 ->
|
||||
if d.t_len < 2 then ret decode_utf_8 (malformed d.t 0 1) 1 d else
|
||||
if d.t_len < 3 then ret decode_utf_8 (r_utf_8 d.t 0 2) 2 d else
|
||||
ret b3 (r_utf_8 d.t 0 2) 2 d
|
||||
| 3 ->
|
||||
if d.t_len < 3
|
||||
then ret decode_utf_8 (malformed d.t 0 d.t_len) d.t_len d
|
||||
else ret decode_utf_8 (r_utf_8 d.t 0 3) 3 d
|
||||
| 4 ->
|
||||
if d.t_len < 3
|
||||
then ret decode_utf_8 (malformed d.t 0 d.t_len) d.t_len d
|
||||
else (d.t_need <- 4; t_fill t_decode_utf_8 d)
|
||||
| n -> assert false
|
||||
|
||||
let guessed_utf_16 d be v = (* start decoder after `UTF_16{BE,LE} guess. *)
|
||||
let decode_utf_16, t_decode_utf_16, t_decode_utf_16_lo, j0, j1 =
|
||||
if be then decode_utf_16be, t_decode_utf_16be, t_decode_utf_16be_lo, 0, 1
|
||||
else decode_utf_16le, t_decode_utf_16le, t_decode_utf_16le_lo, 1, 0
|
||||
in
|
||||
let b3 k d =
|
||||
if d.t_len < 3 then decode_utf_16 d (* decodes `End *) else
|
||||
begin (* copy d.t.(2) to d.t.(0) and decode. *)
|
||||
d.t_need <- 2; d.t_len <- 1;
|
||||
unsafe_set_byte d.t 0 (unsafe_byte d.t 2);
|
||||
t_fill k d
|
||||
end
|
||||
in
|
||||
match v with
|
||||
| `BOM -> ret (b3 t_decode_utf_16) (`Uchar u_bom) 2 d
|
||||
| `ASCII u -> ret (b3 t_decode_utf_16) (`Uchar u) 2 d
|
||||
| `Decode ->
|
||||
match r_utf_16 d.t j0 j1 with
|
||||
| `Malformed _ | `Uchar _ as v -> ret (b3 t_decode_utf_16) v 2 d
|
||||
| `Hi hi ->
|
||||
if d.t_len < 3
|
||||
then ret decode_utf_16 (malformed_pair be hi "" 0 0) d.t_len d
|
||||
else (b3 (t_decode_utf_16_lo hi)) d
|
||||
|
||||
let guess_encoding d = (* guess encoding and start decoder. *)
|
||||
let setup d = match r_encoding d.t 0 d.t_len with
|
||||
| `UTF_8 r ->
|
||||
d.encoding <- `UTF_8; d.k <- decode_utf_8;
|
||||
begin match r with
|
||||
| `BOM -> ret decode_utf_8 (`Uchar u_bom) 3 d
|
||||
| `Decode -> guessed_utf_8 d
|
||||
| `End -> `End
|
||||
end
|
||||
| `UTF_16BE r ->
|
||||
d.encoding <- `UTF_16BE; d.k <- decode_utf_16be; guessed_utf_16 d true r
|
||||
| `UTF_16LE r ->
|
||||
d.encoding <- `UTF_16LE; d.k <- decode_utf_16le; guessed_utf_16 d false r
|
||||
|
||||
in
|
||||
(t_need d 3; t_fill setup d)
|
||||
|
||||
(* Character post-processors. Used for BOM handling, newline
|
||||
normalization and position tracking. The [pp_remove_bom] is only
|
||||
used for the first character to remove a possible initial BOM and
|
||||
handle UTF-16 endianness recognition. *)
|
||||
|
||||
let nline d = d.col <- 0; d.line <- d.line + 1 (* inlined. *)
|
||||
let ncol d = d.col <- d.col + 1 (* inlined. *)
|
||||
let ncount d = d.count <- d.count + 1 (* inlined. *)
|
||||
let cr d b = d.last_cr <- b (* inlined. *)
|
||||
|
||||
let pp_remove_bom utf16 pp d = function(* removes init. BOM, handles UTF-16. *)
|
||||
| `Uchar 0xFEFF (* BOM *) ->
|
||||
if utf16 then (d.encoding <- `UTF_16BE; d.k <- decode_utf_16be);
|
||||
d.removed_bom <- true; d.pp <- pp; d.k d
|
||||
| `Uchar 0xFFFE (* BOM reversed from decode_utf_16be *) when utf16 ->
|
||||
d.encoding <- `UTF_16LE; d.k <- decode_utf_16le;
|
||||
d.removed_bom <- true; d.pp <- pp; d.k d
|
||||
| `Malformed _ | `Uchar _ as v ->
|
||||
d.removed_bom <- false; d.pp <- pp; d.pp d v
|
||||
|
||||
let pp_nln_none d = function
|
||||
| `Uchar 0x000A (* LF *) as v ->
|
||||
let last_cr = d.last_cr in
|
||||
cr d false; ncount d; if last_cr then v else (nline d; v)
|
||||
| `Uchar 0x000D (* CR *) as v -> cr d true; ncount d; nline d; v
|
||||
| `Uchar (0x0085 | 0x000C | 0x2028 | 0x2029) (* NEL | FF | LS | PS *) as v ->
|
||||
cr d false; ncount d; nline d; v
|
||||
| `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v
|
||||
|
||||
let pp_nln_readline d = function
|
||||
| `Uchar 0x000A (* LF *) ->
|
||||
let last_cr = d.last_cr in
|
||||
cr d false; if last_cr then d.k d else (ncount d; nline d; `Uchar d.nl)
|
||||
| `Uchar 0x000D (* CR *) -> cr d true; ncount d; nline d; `Uchar d.nl
|
||||
| `Uchar (0x0085 | 0x000C | 0x2028 | 0x2029) (* NEL | FF | LS | PS *) ->
|
||||
cr d false; ncount d; nline d; `Uchar d.nl
|
||||
| `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v
|
||||
|
||||
let pp_nln_nlf d = function
|
||||
| `Uchar 0x000A (* LF *) ->
|
||||
let last_cr = d.last_cr in
|
||||
cr d false; if last_cr then d.k d else (ncount d; nline d; `Uchar d.nl)
|
||||
| `Uchar 0x000D (* CR *) -> cr d true; ncount d; nline d; `Uchar d.nl
|
||||
| `Uchar 0x0085 (* NEL *) -> cr d false; ncount d; nline d; `Uchar d.nl
|
||||
| `Uchar (0x000C | 0x2028 | 0x2029) as v (* FF | LS | PS *) ->
|
||||
cr d false; ncount d; nline d; v
|
||||
| `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v
|
||||
|
||||
let pp_nln_ascii d = function
|
||||
| `Uchar 0x000A (* LF *) ->
|
||||
let last_cr = d.last_cr in
|
||||
cr d false; if last_cr then d.k d else (ncount d; nline d; `Uchar d.nl)
|
||||
| `Uchar 0x000D (* CR *) -> cr d true; ncount d; nline d; `Uchar d.nl
|
||||
| `Uchar (0x0085 | 0x000C | 0x2028 | 0x2029) as v (* NEL | FF | LS | PS *) ->
|
||||
cr d false; ncount d; nline d; v
|
||||
| `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v
|
||||
|
||||
let decode_fun = function
|
||||
| `UTF_8 -> decode_utf_8
|
||||
| `UTF_16 -> decode_utf_16be (* see [pp_remove_bom]. *)
|
||||
| `UTF_16BE -> decode_utf_16be
|
||||
| `UTF_16LE -> decode_utf_16le
|
||||
| `US_ASCII -> decode_us_ascii
|
||||
| `ISO_8859_1 -> decode_iso_8859_1
|
||||
|
||||
let decoder ?nln ?encoding src =
|
||||
let pp, nl = match nln with
|
||||
| None -> pp_nln_none, 0x000A (* not used. *)
|
||||
| Some (`ASCII nl) -> pp_nln_ascii, nl
|
||||
| Some (`NLF nl) -> pp_nln_nlf, nl
|
||||
| Some (`Readline nl) -> pp_nln_readline, nl
|
||||
in
|
||||
let encoding, k = match encoding with
|
||||
| None -> `UTF_8, guess_encoding
|
||||
| Some e -> (e :> decoder_encoding), decode_fun e
|
||||
in
|
||||
let i, i_pos, i_max = match src with
|
||||
| `Manual -> "", 1, 0 (* implies src_rem d = 0. *)
|
||||
| `Channel _ -> String.create io_buffer_size, 1, 0 (* idem. *)
|
||||
| `String s -> s, 0, String.length s - 1
|
||||
in
|
||||
{ src = (src :> src); encoding; nln = (nln :> nln option); nl;
|
||||
i; i_pos; i_max; t = String.create 4; t_len = 0; t_need = 0;
|
||||
removed_bom = false; last_cr = false; line = 1; col = 0;
|
||||
byte_count = 0; count = 0;
|
||||
pp = pp_remove_bom (encoding = `UTF_16) pp; k }
|
||||
|
||||
let decode d = d.k d
|
||||
let decoder_line d = d.line
|
||||
let decoder_col d = d.col
|
||||
let decoder_byte_count d = d.byte_count
|
||||
let decoder_count d = d.count
|
||||
let decoder_removed_bom d = d.removed_bom
|
||||
let decoder_src d = d.src
|
||||
let decoder_nln d = d.nln
|
||||
let decoder_encoding d = d.encoding
|
||||
let set_decoder_encoding d e =
|
||||
d.encoding <- (e :> decoder_encoding); d.k <- decode_fun e
|
||||
|
||||
(* Encode *)
|
||||
|
||||
type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ]
|
||||
type encode = [ `Await | `End | `Uchar of uchar ]
|
||||
type encoder =
|
||||
{ dst : dst; (* output destination. *)
|
||||
encoding : encoding; (* encoded encoding. *)
|
||||
mutable o : string; (* current output chunk. *)
|
||||
mutable o_pos : int; (* next output position to write. *)
|
||||
mutable o_max : int; (* maximal output position to write. *)
|
||||
t : string; (* four bytes buffer for overlapping writes. *)
|
||||
mutable t_pos : int; (* next position to read in [t]. *)
|
||||
mutable t_max : int; (* maximal position to read in [t]. *)
|
||||
mutable k : (* encoder continuation. *)
|
||||
encoder -> encode -> [ `Ok | `Partial ] }
|
||||
|
||||
(* On encodes that overlap two (or more) [e.o] buffers, we encode the
|
||||
character to the temporary buffer [o.t] and continue with
|
||||
[tmp_flush] to write this data on the different [e.o] buffers. If
|
||||
the [e.o] buffers are not too small this is faster than
|
||||
continuation based byte per byte writes. *)
|
||||
|
||||
let o_rem e = e.o_max - e.o_pos + 1 (* remaining bytes to write in [e.o]. *)
|
||||
let dst e s j l = (* set [e.o] with [s]. *)
|
||||
if (j < 0 || l < 0 || j + l > String.length s) then invalid_bounds j l;
|
||||
e.o <- s; e.o_pos <- j; e.o_max <- j + l - 1
|
||||
|
||||
let partial k e = function `Await -> k e | `Uchar _ | `End -> invalid_encode ()
|
||||
let flush k e = match e.dst with(* get free storage in [d.o] and [k]ontinue. *)
|
||||
| `Manual -> e.k <- partial k; `Partial
|
||||
| `Buffer b -> Buffer.add_substring b e.o 0 e.o_pos; e.o_pos <- 0; k e
|
||||
| `Channel oc -> output oc e.o 0 e.o_pos; e.o_pos <- 0; k e
|
||||
|
||||
let t_range e max = e.t_pos <- 0; e.t_max <- max
|
||||
let rec t_flush k e = (* flush [d.t] up to [d.t_max] in [d.i]. *)
|
||||
let blit e l =
|
||||
unsafe_blit e.t e.t_pos e.o e.o_pos l;
|
||||
e.o_pos <- e.o_pos + l; e.t_pos <- e.t_pos + l
|
||||
in
|
||||
let rem = o_rem e in
|
||||
let len = e.t_max - e.t_pos + 1 in
|
||||
if rem < len then (blit e rem; flush (t_flush k) e) else (blit e len; k e)
|
||||
|
||||
(* Encoders. *)
|
||||
|
||||
let rec encode_utf_8 e v =
|
||||
let k e = e.k <- encode_utf_8; `Ok in
|
||||
match v with
|
||||
| `Await -> k e
|
||||
| `End -> flush k e
|
||||
| `Uchar u as v ->
|
||||
let rem = o_rem e in
|
||||
if u <= 0x007F then
|
||||
if rem < 1 then flush (fun e -> encode_utf_8 e v) e else
|
||||
(unsafe_set_byte e.o e.o_pos u; e.o_pos <- e.o_pos + 1; k e)
|
||||
else if u <= 0x07FF then
|
||||
begin
|
||||
let s, j, k =
|
||||
if rem < 2 then (t_range e 1; e.t, 0, t_flush k) else
|
||||
let j = e.o_pos in (e.o_pos <- e.o_pos + 2; e.o, j, k)
|
||||
in
|
||||
unsafe_set_byte s j (0xC0 lor (u lsr 6));
|
||||
unsafe_set_byte s (j + 1) (0x80 lor (u land 0x3F));
|
||||
k e
|
||||
end
|
||||
else if u <= 0xFFFF then
|
||||
begin
|
||||
let s, j, k =
|
||||
if rem < 3 then (t_range e 2; e.t, 0, t_flush k) else
|
||||
let j = e.o_pos in (e.o_pos <- e.o_pos + 3; e.o, j, k)
|
||||
in
|
||||
unsafe_set_byte s j (0xE0 lor (u lsr 12));
|
||||
unsafe_set_byte s (j + 1) (0x80 lor ((u lsr 6) land 0x3F));
|
||||
unsafe_set_byte s (j + 2) (0x80 lor (u land 0x3F));
|
||||
k e
|
||||
end
|
||||
else
|
||||
begin
|
||||
let s, j, k =
|
||||
if rem < 4 then (t_range e 3; e.t, 0, t_flush k) else
|
||||
let j = e.o_pos in (e.o_pos <- e.o_pos + 4; e.o, j, k)
|
||||
in
|
||||
unsafe_set_byte s j (0xF0 lor (u lsr 18));
|
||||
unsafe_set_byte s (j + 1) (0x80 lor ((u lsr 12) land 0x3F));
|
||||
unsafe_set_byte s (j + 2) (0x80 lor ((u lsr 6) land 0x3F));
|
||||
unsafe_set_byte s (j + 3) (0x80 lor (u land 0x3F));
|
||||
k e
|
||||
end
|
||||
|
||||
let rec encode_utf_16be e v =
|
||||
let k e = e.k <- encode_utf_16be; `Ok in
|
||||
match v with
|
||||
| `Await -> k e
|
||||
| `End -> flush k e
|
||||
| `Uchar u ->
|
||||
let rem = o_rem e in
|
||||
if u < 0x10000 then
|
||||
begin
|
||||
let s, j, k =
|
||||
if rem < 2 then (t_range e 1; e.t, 0, t_flush k) else
|
||||
let j = e.o_pos in (e.o_pos <- e.o_pos + 2; e.o, j, k)
|
||||
in
|
||||
unsafe_set_byte s j (u lsr 8);
|
||||
unsafe_set_byte s (j + 1) (u land 0xFF);
|
||||
k e
|
||||
end else begin
|
||||
let s, j, k =
|
||||
if rem < 4 then (t_range e 3; e.t, 0, t_flush k) else
|
||||
let j = e.o_pos in (e.o_pos <- e.o_pos + 4; e.o, j, k)
|
||||
in
|
||||
let u' = u - 0x10000 in
|
||||
let hi = (0xD800 lor (u' lsr 10)) in
|
||||
let lo = (0xDC00 lor (u' land 0x3FF)) in
|
||||
unsafe_set_byte s j (hi lsr 8);
|
||||
unsafe_set_byte s (j + 1) (hi land 0xFF);
|
||||
unsafe_set_byte s (j + 2) (lo lsr 8);
|
||||
unsafe_set_byte s (j + 3) (lo land 0xFF);
|
||||
k e
|
||||
end
|
||||
|
||||
let rec encode_utf_16le e v = (* encode_uft_16be with bytes swapped. *)
|
||||
let k e = e.k <- encode_utf_16le; `Ok in
|
||||
match v with
|
||||
| `Await -> k e
|
||||
| `End -> flush k e
|
||||
| `Uchar u ->
|
||||
let rem = o_rem e in
|
||||
if u < 0x10000 then
|
||||
begin
|
||||
let s, j, k =
|
||||
if rem < 2 then (t_range e 1; e.t, 0, t_flush k) else
|
||||
let j = e.o_pos in (e.o_pos <- e.o_pos + 2; e.o, j, k)
|
||||
in
|
||||
unsafe_set_byte s j (u land 0xFF);
|
||||
unsafe_set_byte s (j + 1) (u lsr 8);
|
||||
k e
|
||||
end
|
||||
else
|
||||
begin
|
||||
let s, j, k =
|
||||
if rem < 4 then (t_range e 3; e.t, 0, t_flush k) else
|
||||
let j = e.o_pos in (e.o_pos <- e.o_pos + 4; e.o, j, k)
|
||||
in
|
||||
let u' = u - 0x10000 in
|
||||
let hi = (0xD800 lor (u' lsr 10)) in
|
||||
let lo = (0xDC00 lor (u' land 0x3FF)) in
|
||||
unsafe_set_byte s j (hi land 0xFF);
|
||||
unsafe_set_byte s (j + 1) (hi lsr 8);
|
||||
unsafe_set_byte s (j + 2) (lo land 0xFF);
|
||||
unsafe_set_byte s (j + 3) (lo lsr 8);
|
||||
k e
|
||||
end
|
||||
|
||||
let encode_fun = function
|
||||
| `UTF_8 -> encode_utf_8
|
||||
| `UTF_16 -> encode_utf_16be
|
||||
| `UTF_16BE -> encode_utf_16be
|
||||
| `UTF_16LE -> encode_utf_16le
|
||||
|
||||
let encoder encoding dst =
|
||||
let o, o_pos, o_max = match dst with
|
||||
| `Manual -> "", 1, 0 (* implies o_rem e = 0. *)
|
||||
| `Buffer _
|
||||
| `Channel _ -> String.create io_buffer_size, 0, io_buffer_size - 1
|
||||
in
|
||||
{ dst = (dst :> dst); encoding = (encoding :> encoding); o; o_pos; o_max;
|
||||
t = String.create 4; t_pos = 1; t_max = 0; k = encode_fun encoding}
|
||||
|
||||
let encode e v = e.k e (v :> encode)
|
||||
let encoder_encoding e = e.encoding
|
||||
let encoder_dst e = e.dst
|
||||
|
||||
(* Manual sources and destinations. *)
|
||||
|
||||
module Manual = struct
|
||||
let src = src
|
||||
let dst = dst
|
||||
let dst_rem = o_rem
|
||||
end
|
||||
|
||||
(* Strings folders and Buffer encoders *)
|
||||
|
||||
module String = struct
|
||||
let encoding_guess s = match r_encoding s 0 (max (String.length s) 3) with
|
||||
| `UTF_8 d -> `UTF_8, (d = `BOM)
|
||||
| `UTF_16BE d -> `UTF_16BE, (d = `BOM)
|
||||
| `UTF_16LE d -> `UTF_16LE, (d = `BOM)
|
||||
|
||||
type 'a folder =
|
||||
'a -> int -> [ `Uchar of uchar | `Malformed of string ] -> 'a
|
||||
|
||||
let fold_utf_8 f acc s =
|
||||
let rec loop acc f s i l =
|
||||
if i = l then acc else
|
||||
let need = unsafe_array_get utf_8_len (unsafe_byte s i) in
|
||||
if need = 0 then loop (f acc i (malformed s i 1)) f s (i + 1) l else
|
||||
let rem = l - i in
|
||||
if rem < need then f acc i (malformed s i rem) else
|
||||
loop (f acc i (r_utf_8 s i need)) f s (i + need) l
|
||||
in
|
||||
loop acc f s 0 (String.length s)
|
||||
|
||||
let fold_utf_16be f acc s =
|
||||
let rec loop acc f s i l =
|
||||
if i = l then acc else
|
||||
let rem = l - i in
|
||||
if rem < 2 then f acc i (malformed s i 1) else
|
||||
match r_utf_16 s i (i + 1) with
|
||||
| `Uchar _ | `Malformed _ as v -> loop (f acc i v) f s (i + 2) l
|
||||
| `Hi hi ->
|
||||
if rem < 4 then f acc i (malformed s i rem) else
|
||||
loop (f acc i (r_utf_16_lo hi s (i + 2) (i + 3))) f s (i + 4) l
|
||||
in
|
||||
loop acc f s 0 (String.length s)
|
||||
|
||||
let fold_utf_16le f acc s = (* [fold_utf_16be], bytes swapped. *)
|
||||
let rec loop acc f s i l =
|
||||
if i = l then acc else
|
||||
let rem = l - i in
|
||||
if rem < 2 then f acc i (malformed s i 1) else
|
||||
match r_utf_16 s (i + 1) i with
|
||||
| `Uchar _ | `Malformed _ as v -> loop (f acc i v) f s (i + 2) l
|
||||
| `Hi hi ->
|
||||
if rem < 4 then f acc i (malformed s i rem) else
|
||||
loop (f acc i (r_utf_16_lo hi s (i + 3) (i + 2))) f s (i + 4) l
|
||||
in
|
||||
loop acc f s 0 (String.length s)
|
||||
end
|
||||
|
||||
module Buffer = struct
|
||||
let add_utf_8 b u =
|
||||
let w byte = Buffer.add_char b (unsafe_chr byte) in (* inlined. *)
|
||||
if u <= 0x007F then
|
||||
(w u)
|
||||
else if u <= 0x07FF then
|
||||
(w (0xC0 lor (u lsr 6));
|
||||
w (0x80 lor (u land 0x3F)))
|
||||
else if u <= 0xFFFF then
|
||||
(w (0xE0 lor (u lsr 12));
|
||||
w (0x80 lor ((u lsr 6) land 0x3F));
|
||||
w (0x80 lor (u land 0x3F)))
|
||||
else
|
||||
(w (0xF0 lor (u lsr 18));
|
||||
w (0x80 lor ((u lsr 12) land 0x3F));
|
||||
w (0x80 lor ((u lsr 6) land 0x3F));
|
||||
w (0x80 lor (u land 0x3F)))
|
||||
|
||||
let add_utf_16be b u =
|
||||
let w byte = Buffer.add_char b (unsafe_chr byte) in (* inlined. *)
|
||||
if u < 0x10000 then (w (u lsr 8); w (u land 0xFF)) else
|
||||
let u' = u - 0x10000 in
|
||||
let hi = (0xD800 lor (u' lsr 10)) in
|
||||
let lo = (0xDC00 lor (u' land 0x3FF)) in
|
||||
w (hi lsr 8); w (hi land 0xFF);
|
||||
w (lo lsr 8); w (lo land 0xFF)
|
||||
|
||||
let add_utf_16le b u = (* swapped add_utf_16be. *)
|
||||
let w byte = Buffer.add_char b (unsafe_chr byte) in (* inlined. *)
|
||||
if u < 0x10000 then (w (u land 0xFF); w (u lsr 8)) else
|
||||
let u' = u - 0x10000 in
|
||||
let hi = (0xD800 lor (u' lsr 10)) in
|
||||
let lo = (0xDC00 lor (u' land 0x3FF)) in
|
||||
w (hi land 0xFF); w (hi lsr 8);
|
||||
w (lo land 0xFF); w (lo lsr 8)
|
||||
end
|
||||
|
||||
(*---------------------------------------------------------------------------
|
||||
Copyright 2012 Daniel C. Bünzli
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
|
||||
3. Neither the name of Daniel C. Bünzli nor the names of
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
---------------------------------------------------------------------------*)
|
||||
Reference in New Issue
Block a user