mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	| @@ -261,5 +261,14 @@ module Linguist | |||||||
|         Language["Makefile"] |         Language["Makefile"] | ||||||
|       end |       end | ||||||
|     end |     end | ||||||
|  |  | ||||||
|  |     disambiguate "OCaml", "Standard ML" do |data| | ||||||
|  |       if /module|let rec |match\s+(\S+\s)+with/.match(data) | ||||||
|  |         Language["OCaml"] | ||||||
|  |       elsif /=> |case\s+(\S+\s)+of/.match(data) | ||||||
|  |         Language["Standard ML"] | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  |  | ||||||
|   end |   end | ||||||
| end | end | ||||||
|   | |||||||
							
								
								
									
										1344
									
								
								samples/OCaml/cmdliner.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1344
									
								
								samples/OCaml/cmdliner.ml
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										14
									
								
								samples/OCaml/common.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								samples/OCaml/common.ml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | |||||||
|  | (* | ||||||
|  |  * Copyright (c) 2013 Jeremy Yallop. | ||||||
|  |  * | ||||||
|  |  * This file is distributed under the terms of the MIT License. | ||||||
|  |  * See the file LICENSE for details. | ||||||
|  |  *) | ||||||
|  |  | ||||||
|  | let string_of format v =  | ||||||
|  |   let buf = Buffer.create 100 in | ||||||
|  |   let fmt = Format.formatter_of_buffer buf in begin | ||||||
|  |     format fmt v; | ||||||
|  |     Format.pp_print_flush fmt (); | ||||||
|  |     Buffer.contents buf | ||||||
|  |   end | ||||||
							
								
								
									
										40
									
								
								samples/OCaml/date.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								samples/OCaml/date.ml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | |||||||
|  | (* | ||||||
|  |  * Copyright (c) 2013 Jeremy Yallop. | ||||||
|  |  * | ||||||
|  |  * This file is distributed under the terms of the MIT License. | ||||||
|  |  * See the file LICENSE for details. | ||||||
|  |  *) | ||||||
|  |  | ||||||
|  | open Ctypes | ||||||
|  | open PosixTypes | ||||||
|  | open Foreign | ||||||
|  |  | ||||||
|  | type tm | ||||||
|  | let tm = structure "tm" | ||||||
|  | let (-:) ty label = field tm label ty | ||||||
|  | let tm_sec   = int -: "tm_sec"   (* seconds *) | ||||||
|  | let tm_min   = int -: "tm_min"   (* minutes *) | ||||||
|  | let tm_hour  = int -: "tm_hour"  (* hours *) | ||||||
|  | let tm_mday  = int -: "tm_mday"  (* day of the month *) | ||||||
|  | let tm_mon   = int -: "tm_mon"   (* month *) | ||||||
|  | let tm_year  = int -: "tm_year"  (* year *) | ||||||
|  | let tm_wday  = int -: "tm_wday"  (* day of the week *) | ||||||
|  | let tm_yday  = int -: "tm_yday"  (* day in the year *) | ||||||
|  | let tm_isdst = int -: "tm_isdst" (* daylight saving time *) | ||||||
|  | let () = seal (tm : tm structure typ) | ||||||
|  |  | ||||||
|  | let time = foreign "time" ~check_errno:true (ptr time_t @-> returning time_t) | ||||||
|  |  | ||||||
|  | let asctime = foreign "asctime" (ptr tm @-> returning string) | ||||||
|  |  | ||||||
|  | let localtime = foreign "localtime" (ptr time_t @-> returning (ptr tm)) | ||||||
|  |  | ||||||
|  | let () = begin | ||||||
|  |   let timep = allocate_n ~count:1 time_t in | ||||||
|  |   let time = time timep in | ||||||
|  |   assert (time = !@timep); | ||||||
|  |   let tm = localtime timep in | ||||||
|  |   Printf.printf "tm.tm_mon  = %d\n" (getf !@tm tm_mon); | ||||||
|  |   Printf.printf "tm.tm_year = %d\n" (getf !@tm tm_year); | ||||||
|  |   print_endline (asctime tm) | ||||||
|  | end | ||||||
							
								
								
									
										337
									
								
								samples/OCaml/map.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										337
									
								
								samples/OCaml/map.ml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,337 @@ | |||||||
|  | (***********************************************************************) | ||||||
|  | (*                                                                     *) | ||||||
|  | (*                                OCaml                                *) | ||||||
|  | (*                                                                     *) | ||||||
|  | (*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *) | ||||||
|  | (*                                                                     *) | ||||||
|  | (*  Copyright 1996 Institut National de Recherche en Informatique et   *) | ||||||
|  | (*  en Automatique.  All rights reserved.  This file is distributed    *) | ||||||
|  | (*  under the terms of the GNU Library General Public License, with    *) | ||||||
|  | (*  the special exception on linking described in file ../LICENSE.     *) | ||||||
|  | (*                                                                     *) | ||||||
|  | (***********************************************************************) | ||||||
|  |  | ||||||
|  | module type OrderedType = | ||||||
|  |   sig | ||||||
|  |     type t | ||||||
|  |     val compare: t -> t -> int | ||||||
|  |   end | ||||||
|  |  | ||||||
|  | module type S = | ||||||
|  |   sig | ||||||
|  |     type key | ||||||
|  |     type +'a t | ||||||
|  |     val empty: 'a t | ||||||
|  |     val is_empty: 'a t -> bool | ||||||
|  |     val mem:  key -> 'a t -> bool | ||||||
|  |     val add: key -> 'a -> 'a t -> 'a t | ||||||
|  |     val singleton: key -> 'a -> 'a t | ||||||
|  |     val remove: key -> 'a t -> 'a t | ||||||
|  |     val merge: | ||||||
|  |           (key -> 'a option -> 'b option -> 'c option) -> 'a t -> 'b t -> 'c t | ||||||
|  |     val compare: ('a -> 'a -> int) -> 'a t -> 'a t -> int | ||||||
|  |     val equal: ('a -> 'a -> bool) -> 'a t -> 'a t -> bool | ||||||
|  |     val iter: (key -> 'a -> unit) -> 'a t -> unit | ||||||
|  |     val fold: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b | ||||||
|  |     val for_all: (key -> 'a -> bool) -> 'a t -> bool | ||||||
|  |     val exists: (key -> 'a -> bool) -> 'a t -> bool | ||||||
|  |     val filter: (key -> 'a -> bool) -> 'a t -> 'a t | ||||||
|  |     val partition: (key -> 'a -> bool) -> 'a t -> 'a t * 'a t | ||||||
|  |     val cardinal: 'a t -> int | ||||||
|  |     val bindings: 'a t -> (key * 'a) list | ||||||
|  |     val min_binding: 'a t -> (key * 'a) | ||||||
|  |     val max_binding: 'a t -> (key * 'a) | ||||||
|  |     val choose: 'a t -> (key * 'a) | ||||||
|  |     val split: key -> 'a t -> 'a t * 'a option * 'a t | ||||||
|  |     val find: key -> 'a t -> 'a | ||||||
|  |     val map: ('a -> 'b) -> 'a t -> 'b t | ||||||
|  |     val mapi: (key -> 'a -> 'b) -> 'a t -> 'b t | ||||||
|  |   end | ||||||
|  |  | ||||||
|  | module Make(Ord: OrderedType) = struct | ||||||
|  |  | ||||||
|  |     type key = Ord.t | ||||||
|  |  | ||||||
|  |     type 'a t = | ||||||
|  |         Empty | ||||||
|  |       | Node of 'a t * key * 'a * 'a t * int | ||||||
|  |  | ||||||
|  |     let height = function | ||||||
|  |         Empty -> 0 | ||||||
|  |       | Node(_,_,_,_,h) -> h | ||||||
|  |  | ||||||
|  |     let create l x d r = | ||||||
|  |       let hl = height l and hr = height r in | ||||||
|  |       Node(l, x, d, r, (if hl >= hr then hl + 1 else hr + 1)) | ||||||
|  |  | ||||||
|  |     let singleton x d = Node(Empty, x, d, Empty, 1) | ||||||
|  |  | ||||||
|  |     let bal l x d r = | ||||||
|  |       let hl = match l with Empty -> 0 | Node(_,_,_,_,h) -> h in | ||||||
|  |       let hr = match r with Empty -> 0 | Node(_,_,_,_,h) -> h in | ||||||
|  |       if hl > hr + 2 then begin | ||||||
|  |         match l with | ||||||
|  |           Empty -> invalid_arg "Map.bal" | ||||||
|  |         | Node(ll, lv, ld, lr, _) -> | ||||||
|  |             if height ll >= height lr then | ||||||
|  |               create ll lv ld (create lr x d r) | ||||||
|  |             else begin | ||||||
|  |               match lr with | ||||||
|  |                 Empty -> invalid_arg "Map.bal" | ||||||
|  |               | Node(lrl, lrv, lrd, lrr, _)-> | ||||||
|  |                   create (create ll lv ld lrl) lrv lrd (create lrr x d r) | ||||||
|  |             end | ||||||
|  |       end else if hr > hl + 2 then begin | ||||||
|  |         match r with | ||||||
|  |           Empty -> invalid_arg "Map.bal" | ||||||
|  |         | Node(rl, rv, rd, rr, _) -> | ||||||
|  |             if height rr >= height rl then | ||||||
|  |               create (create l x d rl) rv rd rr | ||||||
|  |             else begin | ||||||
|  |               match rl with | ||||||
|  |                 Empty -> invalid_arg "Map.bal" | ||||||
|  |               | Node(rll, rlv, rld, rlr, _) -> | ||||||
|  |                   create (create l x d rll) rlv rld (create rlr rv rd rr) | ||||||
|  |             end | ||||||
|  |       end else | ||||||
|  |         Node(l, x, d, r, (if hl >= hr then hl + 1 else hr + 1)) | ||||||
|  |  | ||||||
|  |     let empty = Empty | ||||||
|  |  | ||||||
|  |     let is_empty = function Empty -> true | _ -> false | ||||||
|  |  | ||||||
|  |     let rec add x data = function | ||||||
|  |         Empty -> | ||||||
|  |           Node(Empty, x, data, Empty, 1) | ||||||
|  |       | Node(l, v, d, r, h) -> | ||||||
|  |           let c = Ord.compare x v in | ||||||
|  |           if c = 0 then | ||||||
|  |             Node(l, x, data, r, h) | ||||||
|  |           else if c < 0 then | ||||||
|  |             bal (add x data l) v d r | ||||||
|  |           else | ||||||
|  |             bal l v d (add x data r) | ||||||
|  |  | ||||||
|  |     let rec find x = function | ||||||
|  |         Empty -> | ||||||
|  |           raise Not_found | ||||||
|  |       | Node(l, v, d, r, _) -> | ||||||
|  |           let c = Ord.compare x v in | ||||||
|  |           if c = 0 then d | ||||||
|  |           else find x (if c < 0 then l else r) | ||||||
|  |  | ||||||
|  |     let rec mem x = function | ||||||
|  |         Empty -> | ||||||
|  |           false | ||||||
|  |       | Node(l, v, d, r, _) -> | ||||||
|  |           let c = Ord.compare x v in | ||||||
|  |           c = 0 || mem x (if c < 0 then l else r) | ||||||
|  |  | ||||||
|  |     let rec min_binding = function | ||||||
|  |         Empty -> raise Not_found | ||||||
|  |       | Node(Empty, x, d, r, _) -> (x, d) | ||||||
|  |       | Node(l, x, d, r, _) -> min_binding l | ||||||
|  |  | ||||||
|  |     let rec max_binding = function | ||||||
|  |         Empty -> raise Not_found | ||||||
|  |       | Node(l, x, d, Empty, _) -> (x, d) | ||||||
|  |       | Node(l, x, d, r, _) -> max_binding r | ||||||
|  |  | ||||||
|  |     let rec remove_min_binding = function | ||||||
|  |         Empty -> invalid_arg "Map.remove_min_elt" | ||||||
|  |       | Node(Empty, x, d, r, _) -> r | ||||||
|  |       | Node(l, x, d, r, _) -> bal (remove_min_binding l) x d r | ||||||
|  |  | ||||||
|  |     let merge t1 t2 = | ||||||
|  |       match (t1, t2) with | ||||||
|  |         (Empty, t) -> t | ||||||
|  |       | (t, Empty) -> t | ||||||
|  |       | (_, _) -> | ||||||
|  |           let (x, d) = min_binding t2 in | ||||||
|  |           bal t1 x d (remove_min_binding t2) | ||||||
|  |  | ||||||
|  |     let rec remove x = function | ||||||
|  |         Empty -> | ||||||
|  |           Empty | ||||||
|  |       | Node(l, v, d, r, h) -> | ||||||
|  |           let c = Ord.compare x v in | ||||||
|  |           if c = 0 then | ||||||
|  |             merge l r | ||||||
|  |           else if c < 0 then | ||||||
|  |             bal (remove x l) v d r | ||||||
|  |           else | ||||||
|  |             bal l v d (remove x r) | ||||||
|  |  | ||||||
|  |     let rec iter f = function | ||||||
|  |         Empty -> () | ||||||
|  |       | Node(l, v, d, r, _) -> | ||||||
|  |           iter f l; f v d; iter f r | ||||||
|  |  | ||||||
|  |     let rec map f = function | ||||||
|  |         Empty -> | ||||||
|  |           Empty | ||||||
|  |       | Node(l, v, d, r, h) -> | ||||||
|  |           let l' = map f l in | ||||||
|  |           let d' = f d in | ||||||
|  |           let r' = map f r in | ||||||
|  |           Node(l', v, d', r', h) | ||||||
|  |  | ||||||
|  |     let rec mapi f = function | ||||||
|  |         Empty -> | ||||||
|  |           Empty | ||||||
|  |       | Node(l, v, d, r, h) -> | ||||||
|  |           let l' = mapi f l in | ||||||
|  |           let d' = f v d in | ||||||
|  |           let r' = mapi f r in | ||||||
|  |           Node(l', v, d', r', h) | ||||||
|  |  | ||||||
|  |     let rec fold f m accu = | ||||||
|  |       match m with | ||||||
|  |         Empty -> accu | ||||||
|  |       | Node(l, v, d, r, _) -> | ||||||
|  |           fold f r (f v d (fold f l accu)) | ||||||
|  |  | ||||||
|  |     let rec for_all p = function | ||||||
|  |         Empty -> true | ||||||
|  |       | Node(l, v, d, r, _) -> p v d && for_all p l && for_all p r | ||||||
|  |  | ||||||
|  |     let rec exists p = function | ||||||
|  |         Empty -> false | ||||||
|  |       | Node(l, v, d, r, _) -> p v d || exists p l || exists p r | ||||||
|  |  | ||||||
|  |     (* Beware: those two functions assume that the added k is *strictly* | ||||||
|  |        smaller (or bigger) than all the present keys in the tree; it | ||||||
|  |        does not test for equality with the current min (or max) key. | ||||||
|  |  | ||||||
|  |        Indeed, they are only used during the "join" operation which | ||||||
|  |        respects this precondition. | ||||||
|  |     *) | ||||||
|  |  | ||||||
|  |     let rec add_min_binding k v = function | ||||||
|  |       | Empty -> singleton k v | ||||||
|  |       | Node (l, x, d, r, h) -> | ||||||
|  |         bal (add_min_binding k v l) x d r | ||||||
|  |  | ||||||
|  |     let rec add_max_binding k v = function | ||||||
|  |       | Empty -> singleton k v | ||||||
|  |       | Node (l, x, d, r, h) -> | ||||||
|  |         bal l x d (add_max_binding k v r) | ||||||
|  |  | ||||||
|  |     (* Same as create and bal, but no assumptions are made on the | ||||||
|  |        relative heights of l and r. *) | ||||||
|  |  | ||||||
|  |     let rec join l v d r = | ||||||
|  |       match (l, r) with | ||||||
|  |         (Empty, _) -> add_min_binding v d r | ||||||
|  |       | (_, Empty) -> add_max_binding v d l | ||||||
|  |       | (Node(ll, lv, ld, lr, lh), Node(rl, rv, rd, rr, rh)) -> | ||||||
|  |           if lh > rh + 2 then bal ll lv ld (join lr v d r) else | ||||||
|  |           if rh > lh + 2 then bal (join l v d rl) rv rd rr else | ||||||
|  |           create l v d r | ||||||
|  |  | ||||||
|  |     (* Merge two trees l and r into one. | ||||||
|  |        All elements of l must precede the elements of r. | ||||||
|  |        No assumption on the heights of l and r. *) | ||||||
|  |  | ||||||
|  |     let concat t1 t2 = | ||||||
|  |       match (t1, t2) with | ||||||
|  |         (Empty, t) -> t | ||||||
|  |       | (t, Empty) -> t | ||||||
|  |       | (_, _) -> | ||||||
|  |           let (x, d) = min_binding t2 in | ||||||
|  |           join t1 x d (remove_min_binding t2) | ||||||
|  |  | ||||||
|  |     let concat_or_join t1 v d t2 = | ||||||
|  |       match d with | ||||||
|  |       | Some d -> join t1 v d t2 | ||||||
|  |       | None -> concat t1 t2 | ||||||
|  |  | ||||||
|  |     let rec split x = function | ||||||
|  |         Empty -> | ||||||
|  |           (Empty, None, Empty) | ||||||
|  |       | Node(l, v, d, r, _) -> | ||||||
|  |           let c = Ord.compare x v in | ||||||
|  |           if c = 0 then (l, Some d, r) | ||||||
|  |           else if c < 0 then | ||||||
|  |             let (ll, pres, rl) = split x l in (ll, pres, join rl v d r) | ||||||
|  |           else | ||||||
|  |             let (lr, pres, rr) = split x r in (join l v d lr, pres, rr) | ||||||
|  |  | ||||||
|  |     let rec merge f s1 s2 = | ||||||
|  |       match (s1, s2) with | ||||||
|  |         (Empty, Empty) -> Empty | ||||||
|  |       | (Node (l1, v1, d1, r1, h1), _) when h1 >= height s2 -> | ||||||
|  |           let (l2, d2, r2) = split v1 s2 in | ||||||
|  |           concat_or_join (merge f l1 l2) v1 (f v1 (Some d1) d2) (merge f r1 r2) | ||||||
|  |       | (_, Node (l2, v2, d2, r2, h2)) -> | ||||||
|  |           let (l1, d1, r1) = split v2 s1 in | ||||||
|  |           concat_or_join (merge f l1 l2) v2 (f v2 d1 (Some d2)) (merge f r1 r2) | ||||||
|  |       | _ -> | ||||||
|  |           assert false | ||||||
|  |  | ||||||
|  |     let rec filter p = function | ||||||
|  |         Empty -> Empty | ||||||
|  |       | Node(l, v, d, r, _) -> | ||||||
|  |           (* call [p] in the expected left-to-right order *) | ||||||
|  |           let l' = filter p l in | ||||||
|  |           let pvd = p v d in | ||||||
|  |           let r' = filter p r in | ||||||
|  |           if pvd then join l' v d r' else concat l' r' | ||||||
|  |  | ||||||
|  |     let rec partition p = function | ||||||
|  |         Empty -> (Empty, Empty) | ||||||
|  |       | Node(l, v, d, r, _) -> | ||||||
|  |           (* call [p] in the expected left-to-right order *) | ||||||
|  |           let (lt, lf) = partition p l in | ||||||
|  |           let pvd = p v d in | ||||||
|  |           let (rt, rf) = partition p r in | ||||||
|  |           if pvd | ||||||
|  |           then (join lt v d rt, concat lf rf) | ||||||
|  |           else (concat lt rt, join lf v d rf) | ||||||
|  |  | ||||||
|  |     type 'a enumeration = End | More of key * 'a * 'a t * 'a enumeration | ||||||
|  |  | ||||||
|  |     let rec cons_enum m e = | ||||||
|  |       match m with | ||||||
|  |         Empty -> e | ||||||
|  |       | Node(l, v, d, r, _) -> cons_enum l (More(v, d, r, e)) | ||||||
|  |  | ||||||
|  |     let compare cmp m1 m2 = | ||||||
|  |       let rec compare_aux e1 e2 = | ||||||
|  |           match (e1, e2) with | ||||||
|  |           (End, End) -> 0 | ||||||
|  |         | (End, _)  -> -1 | ||||||
|  |         | (_, End) -> 1 | ||||||
|  |         | (More(v1, d1, r1, e1), More(v2, d2, r2, e2)) -> | ||||||
|  |             let c = Ord.compare v1 v2 in | ||||||
|  |             if c <> 0 then c else | ||||||
|  |             let c = cmp d1 d2 in | ||||||
|  |             if c <> 0 then c else | ||||||
|  |             compare_aux (cons_enum r1 e1) (cons_enum r2 e2) | ||||||
|  |       in compare_aux (cons_enum m1 End) (cons_enum m2 End) | ||||||
|  |  | ||||||
|  |     let equal cmp m1 m2 = | ||||||
|  |       let rec equal_aux e1 e2 = | ||||||
|  |           match (e1, e2) with | ||||||
|  |           (End, End) -> true | ||||||
|  |         | (End, _)  -> false | ||||||
|  |         | (_, End) -> false | ||||||
|  |         | (More(v1, d1, r1, e1), More(v2, d2, r2, e2)) -> | ||||||
|  |             Ord.compare v1 v2 = 0 && cmp d1 d2 && | ||||||
|  |             equal_aux (cons_enum r1 e1) (cons_enum r2 e2) | ||||||
|  |       in equal_aux (cons_enum m1 End) (cons_enum m2 End) | ||||||
|  |  | ||||||
|  |     let rec cardinal = function | ||||||
|  |         Empty -> 0 | ||||||
|  |       | Node(l, _, _, r, _) -> cardinal l + 1 + cardinal r | ||||||
|  |  | ||||||
|  |     let rec bindings_aux accu = function | ||||||
|  |         Empty -> accu | ||||||
|  |       | Node(l, v, d, r, _) -> bindings_aux ((v, d) :: bindings_aux accu r) l | ||||||
|  |  | ||||||
|  |     let bindings s = | ||||||
|  |       bindings_aux [] s | ||||||
|  |  | ||||||
|  |     let choose = min_binding | ||||||
|  |  | ||||||
|  | end | ||||||
							
								
								
									
										2503
									
								
								samples/OCaml/mirage.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2503
									
								
								samples/OCaml/mirage.ml
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										125
									
								
								samples/OCaml/reload.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								samples/OCaml/reload.ml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,125 @@ | |||||||
|  | (***********************************************************************) | ||||||
|  | (*                                                                     *) | ||||||
|  | (*                                OCaml                                *) | ||||||
|  | (*                                                                     *) | ||||||
|  | (*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *) | ||||||
|  | (*                                                                     *) | ||||||
|  | (*  Copyright 2000 Institut National de Recherche en Informatique et   *) | ||||||
|  | (*  en Automatique.  All rights reserved.  This file is distributed    *) | ||||||
|  | (*  under the terms of the Q Public License version 1.0.               *) | ||||||
|  | (*                                                                     *) | ||||||
|  | (***********************************************************************) | ||||||
|  |  | ||||||
|  | open Cmm | ||||||
|  | open Arch | ||||||
|  | open Reg | ||||||
|  | open Mach | ||||||
|  |  | ||||||
|  | (* Reloading for the AMD64 *) | ||||||
|  |  | ||||||
|  | (* Summary of instruction set constraints: | ||||||
|  |    "S" means either stack or register, "R" means register only. | ||||||
|  |    Operation                    Res     Arg1    Arg2 | ||||||
|  |      Imove                      R       S | ||||||
|  |                              or S       R | ||||||
|  |      Iconst_int                 S if 32-bit signed, R otherwise | ||||||
|  |      Iconst_float               R | ||||||
|  |      Iconst_symbol (not PIC)    S | ||||||
|  |      Iconst_symbol (PIC)        R | ||||||
|  |      Icall_ind                          R | ||||||
|  |      Itailcall_ind                      R | ||||||
|  |      Iload                      R       R       R | ||||||
|  |      Istore                             R       R | ||||||
|  |      Iintop(Icomp)              R       R       S | ||||||
|  |                             or  S       S       R | ||||||
|  |      Iintop(Imul|Idiv|mod)      R       R       S | ||||||
|  |      Iintop(shift)              S       S       R | ||||||
|  |      Iintop(others)             R       R       S | ||||||
|  |                             or  S       S       R | ||||||
|  |      Iintop_imm(Iadd, n)/lea    R       R | ||||||
|  |      Iintop_imm(others)         S       S | ||||||
|  |      Inegf...Idivf              R       R       S | ||||||
|  |      Ifloatofint                R       S | ||||||
|  |      Iintoffloat                R       S | ||||||
|  |      Ispecific(Ilea)            R       R       R | ||||||
|  |      Ispecific(Ifloatarithmem)  R       R       R | ||||||
|  |  | ||||||
|  |    Conditional branches: | ||||||
|  |      Iinttest                           S       R | ||||||
|  |                                     or  R       S | ||||||
|  |      Ifloattest                         R       S    (or  S R if swapped test) | ||||||
|  |      other tests                        S | ||||||
|  | *) | ||||||
|  |  | ||||||
|  | let stackp r = | ||||||
|  |   match r.loc with | ||||||
|  |     Stack _ -> true | ||||||
|  |   | _ -> false | ||||||
|  |  | ||||||
|  | class reload = object (self) | ||||||
|  |  | ||||||
|  | inherit Reloadgen.reload_generic as super | ||||||
|  |  | ||||||
|  | method! reload_operation op arg res = | ||||||
|  |   match op with | ||||||
|  |   | Iintop(Iadd|Isub|Iand|Ior|Ixor|Icomp _|Icheckbound) -> | ||||||
|  |       (* One of the two arguments can reside in the stack, but not both *) | ||||||
|  |       if stackp arg.(0) && stackp arg.(1) | ||||||
|  |       then ([|arg.(0); self#makereg arg.(1)|], res) | ||||||
|  |       else (arg, res) | ||||||
|  |   | Iintop_imm(Iadd, _) when arg.(0).loc <> res.(0).loc -> | ||||||
|  |       (* This add will be turned into a lea; args and results must be | ||||||
|  |          in registers *) | ||||||
|  |       super#reload_operation op arg res | ||||||
|  |   | Iintop(Idiv | Imod | Ilsl | Ilsr | Iasr) | ||||||
|  |   | Iintop_imm(_, _) -> | ||||||
|  |       (* The argument(s) and results can be either in register or on stack *) | ||||||
|  |       (* Note: Idiv, Imod: arg(0) and res(0) already forced in regs | ||||||
|  |                Ilsl, Ilsr, Iasr: arg(1) already forced in regs *) | ||||||
|  |       (arg, res) | ||||||
|  |   | Iintop(Imul) | Iaddf | Isubf | Imulf | Idivf -> | ||||||
|  |       (* First argument (= result) must be in register, second arg | ||||||
|  |          can reside in the stack *) | ||||||
|  |       if stackp arg.(0) | ||||||
|  |       then (let r = self#makereg arg.(0) in ([|r; arg.(1)|], [|r|])) | ||||||
|  |       else (arg, res) | ||||||
|  |   | Ifloatofint | Iintoffloat -> | ||||||
|  |       (* Result must be in register, but argument can be on stack *) | ||||||
|  |       (arg, (if stackp res.(0) then [| self#makereg res.(0) |] else res)) | ||||||
|  |   | Iconst_int n -> | ||||||
|  |       if n <= 0x7FFFFFFFn && n >= -0x80000000n | ||||||
|  |       then (arg, res) | ||||||
|  |       else super#reload_operation op arg res | ||||||
|  |   | Iconst_symbol _ -> | ||||||
|  |       if !pic_code || !Clflags.dlcode | ||||||
|  |       then super#reload_operation op arg res | ||||||
|  |       else (arg, res) | ||||||
|  |   | _ -> (* Other operations: all args and results in registers *) | ||||||
|  |       super#reload_operation op arg res | ||||||
|  |  | ||||||
|  | method! reload_test tst arg = | ||||||
|  |   match tst with | ||||||
|  |     Iinttest cmp -> | ||||||
|  |       (* One of the two arguments can reside on stack *) | ||||||
|  |       if stackp arg.(0) && stackp arg.(1) | ||||||
|  |       then [| self#makereg arg.(0); arg.(1) |] | ||||||
|  |       else arg | ||||||
|  |   | Ifloattest((Clt|Cle), _) -> | ||||||
|  |       (* Cf. emit.mlp: we swap arguments in this case *) | ||||||
|  |       (* First argument can be on stack, second must be in register *) | ||||||
|  |       if stackp arg.(1) | ||||||
|  |       then [| arg.(0); self#makereg arg.(1) |] | ||||||
|  |       else arg | ||||||
|  |   | Ifloattest((Ceq|Cne|Cgt|Cge), _) -> | ||||||
|  |       (* Second argument can be on stack, first must be in register *) | ||||||
|  |       if stackp arg.(0) | ||||||
|  |       then [| self#makereg arg.(0); arg.(1) |] | ||||||
|  |       else arg | ||||||
|  |   | _ -> | ||||||
|  |       (* The argument(s) can be either in register or on stack *) | ||||||
|  |       arg | ||||||
|  |  | ||||||
|  | end | ||||||
|  |  | ||||||
|  | let fundecl f = | ||||||
|  |   (new reload)#fundecl f | ||||||
							
								
								
									
										70
									
								
								samples/OCaml/sigset.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								samples/OCaml/sigset.ml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | |||||||
|  | (* | ||||||
|  |  * Copyright (c) 2013 Jeremy Yallop. | ||||||
|  |  * | ||||||
|  |  * This file is distributed under the terms of the MIT License. | ||||||
|  |  * See the file LICENSE for details. | ||||||
|  |  *) | ||||||
|  |  | ||||||
|  | open PosixTypes | ||||||
|  | open Ctypes | ||||||
|  | open Foreign | ||||||
|  |  | ||||||
|  | type t = sigset_t ptr | ||||||
|  |  | ||||||
|  | let t = ptr sigset_t | ||||||
|  |  | ||||||
|  | (* This function initializes the signal set set to exclude all of the defined | ||||||
|  |    signals. It always returns 0.  *) | ||||||
|  | let sigemptyset = foreign "sigemptyset" (ptr sigset_t @-> returning int) | ||||||
|  |  | ||||||
|  | let empty () = | ||||||
|  |   let setp = allocate_n ~count:1 sigset_t in begin | ||||||
|  |     ignore (sigemptyset setp); | ||||||
|  |     setp | ||||||
|  |   end | ||||||
|  |  | ||||||
|  | (* This function initializes the signal set set to include all of the defined | ||||||
|  |    signals. Again, the return value is 0. *) | ||||||
|  | let sigfillset = foreign "sigfillset" (ptr sigset_t @-> returning int) | ||||||
|  |  | ||||||
|  | let full () = | ||||||
|  |   let setp = allocate_n ~count:1 sigset_t in begin | ||||||
|  |     ignore (sigfillset setp); | ||||||
|  |     setp | ||||||
|  |   end | ||||||
|  |  | ||||||
|  | (* This function adds the signal signum to the signal set set. All sigaddset | ||||||
|  |    does is modify set; it does not block or unblock any signals. | ||||||
|  |  | ||||||
|  |    The return value is 0 on success and -1 on failure. The following errno | ||||||
|  |    error condition is defined for this function: | ||||||
|  |  | ||||||
|  |    EINVAL The signum argument doesn't specify a valid signal.  | ||||||
|  | *) | ||||||
|  | let sigaddset = foreign "sigaddset" ~check_errno:true | ||||||
|  |   (ptr sigset_t @-> int @-> returning int) | ||||||
|  |  | ||||||
|  | let add set signal = ignore (sigaddset set signal) | ||||||
|  |  | ||||||
|  | (* This function removes the signal signum from the signal set set. All | ||||||
|  |    sigdelset does is modify set; it does not block or unblock any signals. | ||||||
|  |  | ||||||
|  |    The return value and error conditions are the same as for | ||||||
|  |    sigaddset.  *) | ||||||
|  | let sigdelset = foreign "sigdelset" ~check_errno:true | ||||||
|  |   (ptr sigset_t @-> int @-> returning int) | ||||||
|  |  | ||||||
|  | let del set signal = ignore (sigdelset set signal) | ||||||
|  |  | ||||||
|  | (* The sigismember function tests whether the signal signum is a member of the | ||||||
|  |    signal set set. It returns 1 if the signal is in the set, 0 if not, and -1 if | ||||||
|  |    there is an error. | ||||||
|  |  | ||||||
|  |    The following errno error condition is defined for this function: | ||||||
|  |  | ||||||
|  |    EINVAL The signum argument doesn't specify a valid signal.  | ||||||
|  | *) | ||||||
|  | let sigismember = foreign "sigismember" ~check_errno:true | ||||||
|  |   (ptr sigset_t @-> int @-> returning int) | ||||||
|  |  | ||||||
|  | let mem set signal = sigismember set signal <> 0 | ||||||
							
								
								
									
										810
									
								
								samples/OCaml/uutf.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										810
									
								
								samples/OCaml/uutf.ml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,810 @@ | |||||||
|  | (*--------------------------------------------------------------------------- | ||||||
|  |    Copyright 2012 Daniel C. Bünzli. All rights reserved. | ||||||
|  |    Distributed under the BSD3 license, see license at the end of the file. | ||||||
|  |    %%NAME%% release %%VERSION%% | ||||||
|  |   ---------------------------------------------------------------------------*) | ||||||
|  |  | ||||||
|  | let io_buffer_size = 65536                           (* IO_BUFFER_SIZE 4.0.0 *) | ||||||
|  |  | ||||||
|  | let pp = Format.fprintf | ||||||
|  | let invalid_encode () = invalid_arg "expected `Await encode" | ||||||
|  | let invalid_bounds j l = | ||||||
|  |   invalid_arg (Printf.sprintf "invalid bounds (index %d, length %d)" j l) | ||||||
|  |  | ||||||
|  | (* Unsafe string byte manipulations. If you don't believe the author's | ||||||
|  |    invariants, replacing with safe versions makes everything safe in | ||||||
|  |    the module. He won't be upset. *) | ||||||
|  |  | ||||||
|  | let unsafe_chr = Char.unsafe_chr | ||||||
|  | let unsafe_blit = String.unsafe_blit | ||||||
|  | let unsafe_array_get = Array.unsafe_get | ||||||
|  | let unsafe_byte s j = Char.code (String.unsafe_get s j) | ||||||
|  | let unsafe_set_byte s j byte = String.unsafe_set s j (Char.unsafe_chr byte) | ||||||
|  |  | ||||||
|  | (* Unicode characters *) | ||||||
|  |  | ||||||
|  | type uchar = int | ||||||
|  | let u_bom = 0xFEFF                                                   (* BOM. *) | ||||||
|  | let u_rep = 0xFFFD                                 (* replacement character. *) | ||||||
|  | let is_uchar cp = | ||||||
|  |   (0x0000 <= cp && cp <= 0xD7FF) || (0xE000 <= cp && cp <= 0x10FFFF) | ||||||
|  |  | ||||||
|  | let pp_cp ppf cp = | ||||||
|  |   if cp < 0 || cp > 0x10FFFF then pp ppf "U+Invalid(%X)" cp else | ||||||
|  |   if cp <= 0xFFFF then pp ppf "U+%04X" cp else | ||||||
|  |   pp ppf "U+%X" cp | ||||||
|  |  | ||||||
|  | let cp_to_string cp =                                    (* NOT thread safe. *) | ||||||
|  |   pp Format.str_formatter "%a" pp_cp cp; Format.flush_str_formatter () | ||||||
|  |  | ||||||
|  | (* Unicode encoding schemes *) | ||||||
|  |  | ||||||
|  | type encoding = [ `UTF_8 | `UTF_16 | `UTF_16BE | `UTF_16LE ] | ||||||
|  | type decoder_encoding = [ encoding | `US_ASCII | `ISO_8859_1 ] | ||||||
|  |  | ||||||
|  | let encoding_of_string s = match String.uppercase s with      (* IANA names. *) | ||||||
|  | | "UTF-8" -> Some `UTF_8 | ||||||
|  | | "UTF-16" -> Some `UTF_16 | ||||||
|  | | "UTF-16LE" -> Some `UTF_16LE | ||||||
|  | | "UTF-16BE" -> Some `UTF_16BE | ||||||
|  | | "ANSI_X3.4-1968" | "ISO-IR-6" | "ANSI_X3.4-1986" | "ISO_646.IRV:1991" | ||||||
|  | | "ASCII" | "ISO646-US" | "US-ASCII" | "US" | "IBM367" | "CP367" | "CSASCII" -> | ||||||
|  |     Some `US_ASCII | ||||||
|  | | "ISO_8859-1:1987" | "ISO-IR-100" | "ISO_8859-1" | "ISO-8859-1" | ||||||
|  | | "LATIN1" | "L1" | "IBM819" | "CP819" | "CSISOLATIN1" -> | ||||||
|  |     Some `ISO_8859_1 | ||||||
|  | | _ -> None | ||||||
|  |  | ||||||
|  | let encoding_to_string = function | ||||||
|  | | `UTF_8 -> "UTF-8" | `UTF_16 -> "UTF-16" | `UTF_16BE -> "UTF-16BE" | ||||||
|  | | `UTF_16LE -> "UTF-16LE" | `US_ASCII -> "US-ASCII" | ||||||
|  | | `ISO_8859_1 -> "ISO-8859-1" | ||||||
|  |  | ||||||
|  | (* Base character decoders. They assume enough data. *) | ||||||
|  |  | ||||||
|  | let malformed s j l = `Malformed (String.sub s j l) | ||||||
|  | let malformed_pair be hi s j l =    (* missing or half low surrogate at eoi. *) | ||||||
|  |   let bs1 = String.sub s j l in | ||||||
|  |   let bs0 = String.create 2 in | ||||||
|  |   let j0, j1 = if be then (0, 1) else (1, 0) in | ||||||
|  |   unsafe_set_byte bs0 j0 (hi lsr 8); | ||||||
|  |   unsafe_set_byte bs0 j1 (hi land 0xFF); | ||||||
|  |   `Malformed (bs0 ^ bs1) | ||||||
|  |  | ||||||
|  | let r_us_ascii s j = | ||||||
|  |   (* assert (0 <= j && j < String.length s); *) | ||||||
|  |   let b0 = unsafe_byte s j in | ||||||
|  |   if b0 <= 127 then `Uchar b0 else malformed s j 1 | ||||||
|  |  | ||||||
|  | let r_iso_8859_1 s j = | ||||||
|  |   (* assert (0 <= j && j < String.length s); *) | ||||||
|  |   `Uchar (unsafe_byte s j) | ||||||
|  |  | ||||||
|  | let utf_8_len = [| (* uchar byte length according to first UTF-8 byte. *) | ||||||
|  |   1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; | ||||||
|  |   1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; | ||||||
|  |   1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; | ||||||
|  |   1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; | ||||||
|  |   1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; | ||||||
|  |   1; 1; 1; 1; 1; 1; 1; 1; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; | ||||||
|  |   0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; | ||||||
|  |   0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; | ||||||
|  |   0; 0; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; | ||||||
|  |   2; 2; 2; 2; 2; 2; 2; 2; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; | ||||||
|  |   4; 4; 4; 4; 4; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0 |] | ||||||
|  |  | ||||||
|  | let r_utf_8 s j l = | ||||||
|  |   (* assert (0 <= j && 0 <= l && j + l <= String.length s); *) | ||||||
|  |   match l with | ||||||
|  |   | 1 -> `Uchar (unsafe_byte s j) | ||||||
|  |   | 2 -> | ||||||
|  |       let b0 = unsafe_byte s j in let b1 = unsafe_byte s (j + 1) in | ||||||
|  |       if b1 lsr 6 != 0b10 then malformed s j l else | ||||||
|  |       `Uchar (((b0 land 0x1F) lsl 6) lor (b1 land 0x3F)) | ||||||
|  |   | 3 -> | ||||||
|  |       let b0 = unsafe_byte s j in let b1 = unsafe_byte s (j + 1) in | ||||||
|  |       let b2 = unsafe_byte s (j + 2) in | ||||||
|  |       let c = `Uchar (((b0 land 0x0F) lsl 12) lor | ||||||
|  |                       ((b1 land 0x3F) lsl 6) lor | ||||||
|  |           (b2 land 0x3F)) | ||||||
|  |       in | ||||||
|  |       if b2 lsr 6 != 0b10 then malformed s j l else | ||||||
|  |       begin match b0 with | ||||||
|  |       | 0xE0 -> if b1 < 0xA0 || 0xBF < b1 then malformed s j l else c | ||||||
|  |       | 0xED -> if b1 < 0x80 || 0x9F < b1 then malformed s j l else c | ||||||
|  |       | _ -> if b1 lsr 6 != 0b10 then malformed s j l else c | ||||||
|  |       end | ||||||
|  |   | 4 -> | ||||||
|  |       let b0 = unsafe_byte s j in let b1 = unsafe_byte s (j + 1) in | ||||||
|  |       let b2 = unsafe_byte s (j + 2) in let b3 = unsafe_byte s (j + 3) in | ||||||
|  |       let c = `Uchar (((b0 land 0x07) lsl 18) lor | ||||||
|  |                       ((b1 land 0x3F) lsl 12) lor | ||||||
|  |           ((b2 land 0x3F) lsl 6) lor | ||||||
|  |                       (b3 land 0x3F)) | ||||||
|  |       in | ||||||
|  |       if b3 lsr 6 != 0b10 || b2 lsr 6 != 0b10 then malformed s j l else | ||||||
|  |       begin match b0 with | ||||||
|  |       | 0xF0 -> if b1 < 0x90 || 0xBF < b1 then malformed s j l else c | ||||||
|  |       | 0xF4 -> if b1 < 0x80 || 0x8F < b1 then malformed s j l else c | ||||||
|  |       | _ -> if b1 lsr 6 != 0b10 then malformed s j l else c | ||||||
|  |       end | ||||||
|  |   | _ -> assert false | ||||||
|  |  | ||||||
|  | let r_utf_16 s j0 j1 =                       (* May return a high surrogate. *) | ||||||
|  |   (* assert (0 <= j0 && 0 <= j1 && max j0 j1 < String.length s); *) | ||||||
|  |   let b0 = unsafe_byte s j0 in let b1 = unsafe_byte s j1 in | ||||||
|  |   let u = (b0 lsl 8) lor b1 in | ||||||
|  |   if u < 0xD800 || u > 0xDFFF then `Uchar u else | ||||||
|  |   if u > 0xDBFF then malformed s (min j0 j1) 2 else `Hi u | ||||||
|  |  | ||||||
|  | let r_utf_16_lo hi s j0 j1 =          (* Combines [hi] with a low surrogate. *) | ||||||
|  |   (* assert (0 <= j0 && 0 <= j1 && max j0 j1 < String.length s); *) | ||||||
|  |   let b0 = unsafe_byte s j0 in | ||||||
|  |   let b1 = unsafe_byte s j1 in | ||||||
|  |   let lo = (b0 lsl 8) lor b1 in | ||||||
|  |   if lo < 0xDC00 || lo > 0xDFFF | ||||||
|  |   then malformed_pair (j0 < j1 (* true => be *)) hi s (min j0 j1) 2 | ||||||
|  |   else `Uchar ((((hi land 0x3FF) lsl 10) lor (lo land 0x3FF)) + 0x10000) | ||||||
|  |  | ||||||
|  | let r_encoding s j l =                  (* guess encoding with max. 3 bytes. *) | ||||||
|  |   (* assert (0 <= j && 0 <= l && j + l <= String.length s) *) | ||||||
|  |   let some i = if i < l then Some (unsafe_byte s (j + i)) else None in | ||||||
|  |   match (some 0), (some 1), (some 2) with | ||||||
|  |   | Some 0xEF, Some 0xBB, Some 0xBF                 -> `UTF_8 `BOM | ||||||
|  |   | Some 0xFE, Some 0xFF, _                         -> `UTF_16BE `BOM | ||||||
|  |   | Some 0xFF, Some 0xFE, _                         -> `UTF_16LE `BOM | ||||||
|  |   | Some 0x00, Some    p, _              when p > 0 -> `UTF_16BE (`ASCII p) | ||||||
|  |   | Some    p, Some 0x00, _              when p > 0 -> `UTF_16LE (`ASCII p) | ||||||
|  |   | Some    u,         _, _ when utf_8_len.(u) <> 0 -> `UTF_8 `Decode | ||||||
|  |   | Some    _, Some    _, _                         -> `UTF_16BE `Decode | ||||||
|  |   | Some    _, None     , None                      -> `UTF_8 `Decode | ||||||
|  |   | None     , None     , None                      -> `UTF_8 `End | ||||||
|  |   | None     , Some    _, _                         -> assert false | ||||||
|  |   | Some    _, None     , Some _                    -> assert false | ||||||
|  |   | None     , None     , Some _                    -> assert false | ||||||
|  |  | ||||||
|  | (* Decode *) | ||||||
|  |  | ||||||
|  | type src = [ `Channel of in_channel | `String of string | `Manual ] | ||||||
|  | type nln = [ `ASCII of uchar | `NLF of uchar | `Readline of uchar ] | ||||||
|  | type decode = [ `Await | `End | `Malformed of string | `Uchar of uchar] | ||||||
|  |  | ||||||
|  | let pp_decode ppf = function | ||||||
|  | | `Uchar u -> pp ppf "@[`Uchar %a@]" pp_cp u | ||||||
|  | | `End -> pp ppf "`End" | ||||||
|  | | `Await -> pp ppf "`Await" | ||||||
|  | | `Malformed bs -> | ||||||
|  |     let l = String.length bs in | ||||||
|  |     pp ppf "@[`Malformed ("; | ||||||
|  |     if l > 0 then pp ppf "%02X" (Char.code (bs.[0])); | ||||||
|  |     for i = 1 to l - 1 do pp ppf " %02X" (Char.code (bs.[i])) done; | ||||||
|  |     pp ppf ")@]" | ||||||
|  |  | ||||||
|  | type decoder = | ||||||
|  |   { src : src;                                              (* input source. *) | ||||||
|  |     mutable encoding : decoder_encoding;                (* decoded encoding. *) | ||||||
|  |     nln : nln option;                     (* newline normalization (if any). *) | ||||||
|  |     nl : int;                            (* newline normalization character. *) | ||||||
|  |     mutable i : string;                              (* current input chunk. *) | ||||||
|  |     mutable i_pos : int;                          (* input current position. *) | ||||||
|  |     mutable i_max : int;                          (* input maximal position. *) | ||||||
|  |     t : string;        (* four bytes temporary buffer for overlapping reads. *) | ||||||
|  |     mutable t_len : int;                      (* current byte length of [t]. *) | ||||||
|  |     mutable t_need : int;                  (* number of bytes needed in [t]. *) | ||||||
|  |     mutable removed_bom : bool;     (* [true] if an initial BOM was removed. *) | ||||||
|  |     mutable last_cr : bool;                   (* [true] if last char was CR. *) | ||||||
|  |     mutable line : int;                                      (* line number. *) | ||||||
|  |     mutable col : int;                                     (* column number. *) | ||||||
|  |     mutable byte_count : int;                                 (* byte count. *) | ||||||
|  |     mutable count : int;                                      (* char count. *) | ||||||
|  |     mutable pp :        (* decoder post-processor for BOM, position and nln. *) | ||||||
|  |       decoder -> [ `Malformed of string | `Uchar of uchar ] -> decode; | ||||||
|  |     mutable k : decoder -> decode }                 (* decoder continuation. *) | ||||||
|  |  | ||||||
|  | (* On decodes that overlap two (or more) [d.i] buffers, we use [t_fill] to copy | ||||||
|  |    the input data to [d.t] and decode from there. If the [d.i] buffers are not | ||||||
|  |    too small this is faster than continuation based byte per byte writes. | ||||||
|  |  | ||||||
|  |    End of input (eoi) is signalled by [d.i_pos = 0] and [d.i_max = min_int] | ||||||
|  |    which implies that [i_rem d < 0] is [true]. *) | ||||||
|  |  | ||||||
|  | let i_rem d = d.i_max - d.i_pos + 1     (* remaining bytes to read in [d.i]. *) | ||||||
|  | let eoi d = d.i <- ""; d.i_pos <- 0; d.i_max <- min_int   (* set eoi in [d]. *) | ||||||
|  | let src d s j l =                                     (* set [d.i] with [s]. *) | ||||||
|  |   if (j < 0 || l < 0 || j + l > String.length s) then invalid_bounds j l else | ||||||
|  |   if (l = 0) then eoi d else | ||||||
|  |   (d.i <- s; d.i_pos <- j; d.i_max <- j + l - 1) | ||||||
|  |  | ||||||
|  | let refill k d = match d.src with  (* get new input in [d.i] and [k]ontinue. *) | ||||||
|  | | `Manual -> d.k <- k; `Await | ||||||
|  | | `String _ -> eoi d; k d | ||||||
|  | | `Channel ic -> | ||||||
|  |     let rc = input ic d.i 0 (String.length d.i) in | ||||||
|  |     (src d d.i 0 rc; k d) | ||||||
|  |  | ||||||
|  | let t_need d need = d.t_len <- 0; d.t_need <- need | ||||||
|  | let rec t_fill k d =      (* get [d.t_need] bytes (or less if eoi) in [i.t]. *) | ||||||
|  |   let blit d l = | ||||||
|  |     unsafe_blit d.i d.i_pos d.t d.t_len (* write pos. *) l; | ||||||
|  |     d.i_pos <- d.i_pos + l; d.t_len <- d.t_len + l; | ||||||
|  |   in | ||||||
|  |   let rem = i_rem d in | ||||||
|  |   if rem < 0 (* eoi *) then k d else | ||||||
|  |   let need = d.t_need - d.t_len in | ||||||
|  |   if rem < need then (blit d rem; refill (t_fill k) d) else (blit d need; k d) | ||||||
|  |  | ||||||
|  | let ret k v byte_count d =                     (* return post-processed [v]. *) | ||||||
|  |   d.k <- k; d.byte_count <- d.byte_count + byte_count; d.pp d v | ||||||
|  |  | ||||||
|  | (* Decoders. *) | ||||||
|  |  | ||||||
|  | let rec decode_us_ascii d = | ||||||
|  |   let rem = i_rem d in | ||||||
|  |   if rem <= 0 then (if rem < 0 then `End else refill decode_us_ascii d) else | ||||||
|  |   let j = d.i_pos in | ||||||
|  |   d.i_pos <- d.i_pos + 1; ret decode_us_ascii (r_us_ascii d.i j) 1 d | ||||||
|  |  | ||||||
|  | let rec decode_iso_8859_1 d = | ||||||
|  |   let rem = i_rem d in | ||||||
|  |   if rem <= 0 then (if rem < 0 then `End else refill decode_iso_8859_1 d) else | ||||||
|  |   let j = d.i_pos in | ||||||
|  |   d.i_pos <- d.i_pos + 1; ret decode_iso_8859_1 (r_iso_8859_1 d.i j) 1 d | ||||||
|  |  | ||||||
|  | (* UTF-8 decoder *) | ||||||
|  |  | ||||||
|  | let rec t_decode_utf_8 d =                             (* decode from [d.t]. *) | ||||||
|  |   if d.t_len < d.t_need | ||||||
|  |   then ret decode_utf_8 (malformed d.t 0 d.t_len) d.t_len d | ||||||
|  |   else ret decode_utf_8 (r_utf_8 d.t 0 d.t_len) d.t_len d | ||||||
|  |  | ||||||
|  | and decode_utf_8 d = | ||||||
|  |   let rem = i_rem d in | ||||||
|  |   if rem <= 0 then (if rem < 0 then `End else refill decode_utf_8 d) else | ||||||
|  |   let need = unsafe_array_get utf_8_len (unsafe_byte d.i d.i_pos) in | ||||||
|  |   if rem < need then (t_need d need; t_fill t_decode_utf_8 d) else | ||||||
|  |   let j = d.i_pos in | ||||||
|  |   if need = 0 | ||||||
|  |   then (d.i_pos <- d.i_pos + 1; ret decode_utf_8 (malformed d.i j 1) 1 d) | ||||||
|  |   else (d.i_pos <- d.i_pos + need; ret decode_utf_8 (r_utf_8 d.i j need) need d) | ||||||
|  |  | ||||||
|  | (* UTF-16BE decoder *) | ||||||
|  |  | ||||||
|  | let rec t_decode_utf_16be_lo hi d =                    (* decode from [d.t]. *) | ||||||
|  |   let bcount = d.t_len + 2 (* hi count *) in | ||||||
|  |   if d.t_len < d.t_need | ||||||
|  |   then ret decode_utf_16be (malformed_pair true hi d.t 0 d.t_len) bcount d | ||||||
|  |   else ret decode_utf_16be (r_utf_16_lo hi d.t 0 1) bcount d | ||||||
|  |  | ||||||
|  | and t_decode_utf_16be d =                              (* decode from [d.t]. *) | ||||||
|  |   if d.t_len < d.t_need | ||||||
|  |   then ret decode_utf_16be (malformed d.t 0 d.t_len) d.t_len d | ||||||
|  |   else decode_utf_16be_lo (r_utf_16 d.t 0 1) d | ||||||
|  |  | ||||||
|  | and decode_utf_16be_lo v d = match v with | ||||||
|  | | `Uchar _ | `Malformed _ as v -> ret decode_utf_16be v 2 d | ||||||
|  | | `Hi hi -> | ||||||
|  |     let rem = i_rem d in | ||||||
|  |     if rem < 2 then (t_need d 2; t_fill (t_decode_utf_16be_lo hi) d) else | ||||||
|  |     let j = d.i_pos in | ||||||
|  |     d.i_pos <- d.i_pos + 2; | ||||||
|  |     ret decode_utf_16be (r_utf_16_lo hi d.i j (j + 1)) 4 d | ||||||
|  |  | ||||||
|  | and decode_utf_16be d = | ||||||
|  |   let rem = i_rem d in | ||||||
|  |   if rem <= 0 then (if rem < 0 then `End else refill decode_utf_16be d) else | ||||||
|  |   if rem < 2 then (t_need d 2; t_fill t_decode_utf_16be d) else | ||||||
|  |   let j = d.i_pos in | ||||||
|  |   d.i_pos <- d.i_pos + 2; decode_utf_16be_lo (r_utf_16 d.i j (j + 1)) d | ||||||
|  |  | ||||||
|  | (* UTF-16LE decoder, same as UTF-16BE with byte swapped. *) | ||||||
|  |  | ||||||
|  | let rec t_decode_utf_16le_lo hi d =                    (* decode from [d.t]. *) | ||||||
|  |   let bcount = d.t_len + 2 (* hi count *) in | ||||||
|  |   if d.t_len < d.t_need | ||||||
|  |   then ret decode_utf_16le (malformed_pair false hi d.t 0 d.t_len) bcount d | ||||||
|  |   else ret decode_utf_16le (r_utf_16_lo hi d.t 1 0) bcount d | ||||||
|  |  | ||||||
|  | and t_decode_utf_16le d =                              (* decode from [d.t]. *) | ||||||
|  |   if d.t_len < d.t_need | ||||||
|  |   then ret decode_utf_16le (malformed d.t 0 d.t_len) d.t_len d | ||||||
|  |   else decode_utf_16le_lo (r_utf_16 d.t 1 0) d | ||||||
|  |  | ||||||
|  | and decode_utf_16le_lo v d = match v with | ||||||
|  | | `Uchar _ | `Malformed _ as v -> ret decode_utf_16le v 2 d | ||||||
|  | | `Hi hi -> | ||||||
|  |     let rem = i_rem d in | ||||||
|  |     if rem < 2 then (t_need d 2; t_fill (t_decode_utf_16le_lo hi) d) else | ||||||
|  |     let j = d.i_pos in | ||||||
|  |     d.i_pos <- d.i_pos + 2; | ||||||
|  |     ret decode_utf_16le (r_utf_16_lo hi d.i (j + 1) j) 4 d | ||||||
|  |  | ||||||
|  | and decode_utf_16le d = | ||||||
|  |   let rem = i_rem d in | ||||||
|  |   if rem <= 0 then (if rem < 0 then `End else refill decode_utf_16le d) else | ||||||
|  |   if rem < 2 then (t_need d 2; t_fill t_decode_utf_16le d) else | ||||||
|  |   let j = d.i_pos in | ||||||
|  |   d.i_pos <- d.i_pos + 2; decode_utf_16le_lo (r_utf_16 d.i (j + 1) j) d | ||||||
|  |  | ||||||
|  | (* Encoding guessing. The guess is simple but starting the decoder | ||||||
|  |    after is tedious, uutf's decoders are not designed to put bytes | ||||||
|  |    back in the stream. *) | ||||||
|  |  | ||||||
|  | let guessed_utf_8 d =                   (* start decoder after `UTF_8 guess. *) | ||||||
|  |   let b3 d =                                 (* handles the third read byte. *) | ||||||
|  |     let b3 = unsafe_byte d.t 2 in | ||||||
|  |     match utf_8_len.(b3) with | ||||||
|  |     | 0 -> ret decode_utf_8 (malformed d.t 2 1) 1 d | ||||||
|  |     | n -> | ||||||
|  |         d.t_need <- n; d.t_len <- 1; unsafe_set_byte d.t 0 b3; | ||||||
|  |         t_fill t_decode_utf_8 d | ||||||
|  |   in | ||||||
|  |   let b2 d =                                     (* handle second read byte. *) | ||||||
|  |     let b2 = unsafe_byte d.t 1 in | ||||||
|  |     let b3 = if d.t_len > 2 then b3 else decode_utf_8 (* decodes `End *) in | ||||||
|  |     match utf_8_len.(b2) with | ||||||
|  |     | 0 -> ret b3 (malformed d.t 1 1) 1 d | ||||||
|  |     | 1 -> ret b3 (r_utf_8 d.t 1 1) 1 d | ||||||
|  |     | n ->                         (* copy d.t.(1-2) to d.t.(0-1) and decode *) | ||||||
|  |         d.t_need <- n; | ||||||
|  |         unsafe_set_byte d.t 0 b2; | ||||||
|  |         if (d.t_len < 3) then d.t_len <- 1 else | ||||||
|  |         (d.t_len <- 2; unsafe_set_byte d.t 1 (unsafe_byte d.t 2); ); | ||||||
|  |         t_fill t_decode_utf_8 d | ||||||
|  |   in | ||||||
|  |   let b1 = unsafe_byte d.t 0 in                   (* handle first read byte. *) | ||||||
|  |   let b2 = if d.t_len > 1 then b2 else decode_utf_8 (* decodes `End *) in | ||||||
|  |   match utf_8_len.(b1) with | ||||||
|  |   | 0 -> ret b2 (malformed d.t 0 1) 1 d | ||||||
|  |   | 1 -> ret b2 (r_utf_8 d.t 0 1) 1 d | ||||||
|  |   | 2 -> | ||||||
|  |       if d.t_len < 2 then ret decode_utf_8 (malformed d.t 0 1) 1 d else | ||||||
|  |       if d.t_len < 3 then ret decode_utf_8 (r_utf_8 d.t 0 2) 2 d else | ||||||
|  |       ret b3 (r_utf_8 d.t 0 2) 2 d | ||||||
|  |   | 3 -> | ||||||
|  |       if d.t_len < 3 | ||||||
|  |       then ret decode_utf_8 (malformed d.t 0 d.t_len) d.t_len d | ||||||
|  |       else ret decode_utf_8 (r_utf_8 d.t 0 3) 3 d | ||||||
|  |   | 4 -> | ||||||
|  |       if d.t_len < 3 | ||||||
|  |       then ret decode_utf_8 (malformed d.t 0 d.t_len) d.t_len d | ||||||
|  |       else (d.t_need <- 4; t_fill t_decode_utf_8 d) | ||||||
|  |   | n -> assert false | ||||||
|  |  | ||||||
|  | let guessed_utf_16 d be v =     (* start decoder after `UTF_16{BE,LE} guess. *) | ||||||
|  |   let decode_utf_16, t_decode_utf_16, t_decode_utf_16_lo, j0, j1 = | ||||||
|  |     if be then decode_utf_16be, t_decode_utf_16be, t_decode_utf_16be_lo, 0, 1 | ||||||
|  |     else decode_utf_16le, t_decode_utf_16le, t_decode_utf_16le_lo, 1, 0 | ||||||
|  |   in | ||||||
|  |   let b3 k d = | ||||||
|  |     if d.t_len < 3 then decode_utf_16 d (* decodes `End *) else | ||||||
|  |     begin                             (* copy d.t.(2) to d.t.(0) and decode. *) | ||||||
|  |       d.t_need <- 2; d.t_len <- 1; | ||||||
|  |       unsafe_set_byte d.t 0 (unsafe_byte d.t 2); | ||||||
|  |       t_fill k d | ||||||
|  |     end | ||||||
|  |   in | ||||||
|  |   match v with | ||||||
|  |   | `BOM -> ret (b3 t_decode_utf_16) (`Uchar u_bom) 2 d | ||||||
|  |   | `ASCII u -> ret (b3 t_decode_utf_16) (`Uchar u) 2 d | ||||||
|  |   | `Decode -> | ||||||
|  |       match r_utf_16 d.t j0 j1 with | ||||||
|  |       | `Malformed _ | `Uchar _ as v -> ret (b3 t_decode_utf_16) v 2 d | ||||||
|  |       | `Hi hi -> | ||||||
|  |         if d.t_len < 3 | ||||||
|  |         then ret decode_utf_16 (malformed_pair be hi "" 0 0) d.t_len d | ||||||
|  |         else (b3 (t_decode_utf_16_lo hi)) d | ||||||
|  |  | ||||||
|  | let guess_encoding d =                  (* guess encoding and start decoder. *) | ||||||
|  |   let setup d = match r_encoding d.t 0 d.t_len with | ||||||
|  |   | `UTF_8 r -> | ||||||
|  |       d.encoding <- `UTF_8; d.k <- decode_utf_8; | ||||||
|  |       begin match r with | ||||||
|  |       | `BOM -> ret decode_utf_8 (`Uchar u_bom) 3 d | ||||||
|  |       | `Decode -> guessed_utf_8 d | ||||||
|  |       | `End -> `End | ||||||
|  |       end | ||||||
|  |   | `UTF_16BE r -> | ||||||
|  |       d.encoding <- `UTF_16BE; d.k <- decode_utf_16be; guessed_utf_16 d true r | ||||||
|  |   | `UTF_16LE r -> | ||||||
|  |       d.encoding <- `UTF_16LE; d.k <- decode_utf_16le; guessed_utf_16 d false r | ||||||
|  |  | ||||||
|  |   in | ||||||
|  |   (t_need d 3; t_fill setup d) | ||||||
|  |  | ||||||
|  | (* Character post-processors. Used for BOM handling, newline | ||||||
|  |    normalization and position tracking. The [pp_remove_bom] is only | ||||||
|  |    used for the first character to remove a possible initial BOM and | ||||||
|  |    handle UTF-16 endianness recognition. *) | ||||||
|  |  | ||||||
|  | let nline d = d.col <- 0; d.line <- d.line + 1                   (* inlined. *) | ||||||
|  | let ncol d = d.col <- d.col + 1                                  (* inlined. *) | ||||||
|  | let ncount d = d.count <- d.count + 1                            (* inlined. *) | ||||||
|  | let cr d b = d.last_cr <- b                                      (* inlined. *) | ||||||
|  |  | ||||||
|  | let pp_remove_bom utf16 pp d = function(* removes init. BOM, handles UTF-16. *) | ||||||
|  | | `Uchar 0xFEFF (* BOM *) -> | ||||||
|  |     if utf16 then (d.encoding <- `UTF_16BE; d.k <- decode_utf_16be); | ||||||
|  |     d.removed_bom <- true; d.pp <- pp; d.k d | ||||||
|  | | `Uchar 0xFFFE (* BOM reversed from decode_utf_16be *) when utf16 -> | ||||||
|  |     d.encoding <- `UTF_16LE; d.k <- decode_utf_16le; | ||||||
|  |     d.removed_bom <- true; d.pp <- pp; d.k d | ||||||
|  | | `Malformed _ | `Uchar _ as v -> | ||||||
|  |     d.removed_bom <- false; d.pp <- pp; d.pp d v | ||||||
|  |  | ||||||
|  | let pp_nln_none d = function | ||||||
|  | | `Uchar 0x000A (* LF *) as v -> | ||||||
|  |     let last_cr = d.last_cr in | ||||||
|  |     cr d false; ncount d; if last_cr then v else (nline d; v) | ||||||
|  | | `Uchar 0x000D (* CR *) as v -> cr d true; ncount d; nline d; v | ||||||
|  | | `Uchar (0x0085 | 0x000C | 0x2028 | 0x2029) (* NEL | FF | LS | PS *) as v -> | ||||||
|  |     cr d false; ncount d; nline d; v | ||||||
|  | | `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v | ||||||
|  |  | ||||||
|  | let pp_nln_readline d = function | ||||||
|  | | `Uchar 0x000A (* LF *) -> | ||||||
|  |     let last_cr = d.last_cr in | ||||||
|  |     cr d false; if last_cr then d.k d else (ncount d; nline d; `Uchar d.nl) | ||||||
|  | | `Uchar 0x000D (* CR *) -> cr d true; ncount d; nline d; `Uchar d.nl | ||||||
|  | | `Uchar (0x0085 | 0x000C | 0x2028 | 0x2029) (* NEL | FF | LS | PS *) -> | ||||||
|  |     cr d false; ncount d; nline d; `Uchar d.nl | ||||||
|  | | `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v | ||||||
|  |  | ||||||
|  | let pp_nln_nlf d = function | ||||||
|  | | `Uchar 0x000A (* LF *) -> | ||||||
|  |     let last_cr = d.last_cr in | ||||||
|  |     cr d false; if last_cr then d.k d else (ncount d; nline d; `Uchar d.nl) | ||||||
|  | | `Uchar 0x000D (* CR *) -> cr d true; ncount d; nline d; `Uchar d.nl | ||||||
|  | | `Uchar 0x0085 (* NEL *) -> cr d false; ncount d; nline d; `Uchar d.nl | ||||||
|  | | `Uchar (0x000C | 0x2028 | 0x2029) as v (* FF | LS | PS *) -> | ||||||
|  |     cr d false; ncount d; nline d; v | ||||||
|  | | `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v | ||||||
|  |  | ||||||
|  | let pp_nln_ascii d = function | ||||||
|  | | `Uchar 0x000A (* LF *) -> | ||||||
|  |     let last_cr = d.last_cr in | ||||||
|  |     cr d false; if last_cr then d.k d else (ncount d; nline d; `Uchar d.nl) | ||||||
|  | | `Uchar 0x000D (* CR *) -> cr d true; ncount d; nline d; `Uchar d.nl | ||||||
|  | | `Uchar (0x0085 | 0x000C | 0x2028 | 0x2029) as v (* NEL | FF | LS | PS *) -> | ||||||
|  |     cr d false; ncount d; nline d; v | ||||||
|  | | `Uchar _ | `Malformed _ as v -> cr d false; ncount d; ncol d; v | ||||||
|  |  | ||||||
|  | let decode_fun = function | ||||||
|  | | `UTF_8 -> decode_utf_8 | ||||||
|  | | `UTF_16 -> decode_utf_16be                         (* see [pp_remove_bom]. *) | ||||||
|  | | `UTF_16BE -> decode_utf_16be | ||||||
|  | | `UTF_16LE -> decode_utf_16le | ||||||
|  | | `US_ASCII -> decode_us_ascii | ||||||
|  | | `ISO_8859_1 -> decode_iso_8859_1 | ||||||
|  |  | ||||||
|  | let decoder ?nln ?encoding src = | ||||||
|  |   let pp, nl = match nln with | ||||||
|  |   | None -> pp_nln_none, 0x000A (* not used. *) | ||||||
|  |   | Some (`ASCII nl) -> pp_nln_ascii, nl | ||||||
|  |   | Some (`NLF nl) -> pp_nln_nlf, nl | ||||||
|  |   | Some (`Readline nl) -> pp_nln_readline, nl | ||||||
|  |   in | ||||||
|  |   let encoding, k = match encoding with | ||||||
|  |   | None -> `UTF_8, guess_encoding | ||||||
|  |   | Some e -> (e :> decoder_encoding), decode_fun e | ||||||
|  |   in | ||||||
|  |   let i, i_pos, i_max = match src with | ||||||
|  |   | `Manual -> "", 1, 0                            (* implies src_rem d = 0. *) | ||||||
|  |   | `Channel _ -> String.create io_buffer_size, 1, 0                (* idem. *) | ||||||
|  |   | `String s -> s, 0, String.length s - 1 | ||||||
|  |   in | ||||||
|  |   { src = (src :> src); encoding; nln = (nln :> nln option); nl; | ||||||
|  |     i; i_pos; i_max; t = String.create 4; t_len = 0; t_need = 0; | ||||||
|  |     removed_bom = false; last_cr = false; line = 1; col = 0; | ||||||
|  |     byte_count = 0; count = 0; | ||||||
|  |     pp = pp_remove_bom (encoding = `UTF_16) pp; k } | ||||||
|  |  | ||||||
|  | let decode d = d.k d | ||||||
|  | let decoder_line d = d.line | ||||||
|  | let decoder_col d = d.col | ||||||
|  | let decoder_byte_count d = d.byte_count | ||||||
|  | let decoder_count d = d.count | ||||||
|  | let decoder_removed_bom d = d.removed_bom | ||||||
|  | let decoder_src d = d.src | ||||||
|  | let decoder_nln d = d.nln | ||||||
|  | let decoder_encoding d = d.encoding | ||||||
|  | let set_decoder_encoding d e = | ||||||
|  |   d.encoding <- (e :> decoder_encoding); d.k <- decode_fun e | ||||||
|  |  | ||||||
|  | (* Encode *) | ||||||
|  |  | ||||||
|  | type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] | ||||||
|  | type encode = [ `Await | `End | `Uchar of uchar ] | ||||||
|  | type encoder = | ||||||
|  |   { dst : dst;                                        (* output destination. *) | ||||||
|  |     encoding : encoding;                                (* encoded encoding. *) | ||||||
|  |     mutable o : string;                             (* current output chunk. *) | ||||||
|  |     mutable o_pos : int;                   (* next output position to write. *) | ||||||
|  |     mutable o_max : int;                (* maximal output position to write. *) | ||||||
|  |     t : string;                 (* four bytes buffer for overlapping writes. *) | ||||||
|  |     mutable t_pos : int;                    (* next position to read in [t]. *) | ||||||
|  |     mutable t_max : int;                 (* maximal position to read in [t]. *) | ||||||
|  |     mutable k :                                     (* encoder continuation. *) | ||||||
|  |       encoder -> encode -> [ `Ok | `Partial ] } | ||||||
|  |  | ||||||
|  | (* On encodes that overlap two (or more) [e.o] buffers, we encode the | ||||||
|  |    character to the temporary buffer [o.t] and continue with | ||||||
|  |    [tmp_flush] to write this data on the different [e.o] buffers. If | ||||||
|  |    the [e.o] buffers are not too small this is faster than | ||||||
|  |    continuation based byte per byte writes. *) | ||||||
|  |  | ||||||
|  | let o_rem e = e.o_max - e.o_pos + 1    (* remaining bytes to write in [e.o]. *) | ||||||
|  | let dst e s j l =                                     (* set [e.o] with [s]. *) | ||||||
|  |   if (j < 0 || l < 0 || j + l > String.length s) then invalid_bounds j l; | ||||||
|  |   e.o <- s; e.o_pos <- j; e.o_max <- j + l - 1 | ||||||
|  |  | ||||||
|  | let partial k e = function `Await -> k e | `Uchar _ | `End -> invalid_encode () | ||||||
|  | let flush k e = match e.dst with(* get free storage in [d.o] and [k]ontinue. *) | ||||||
|  | | `Manual -> e.k <- partial k; `Partial | ||||||
|  | | `Buffer b -> Buffer.add_substring b e.o 0 e.o_pos; e.o_pos <- 0; k e | ||||||
|  | | `Channel oc -> output oc e.o 0 e.o_pos; e.o_pos <- 0; k e | ||||||
|  |  | ||||||
|  | let t_range e max = e.t_pos <- 0; e.t_max <- max | ||||||
|  | let rec t_flush k e =               (* flush [d.t] up to [d.t_max] in [d.i]. *) | ||||||
|  |   let blit e l = | ||||||
|  |     unsafe_blit e.t e.t_pos e.o e.o_pos l; | ||||||
|  |     e.o_pos <- e.o_pos + l; e.t_pos <- e.t_pos + l | ||||||
|  |   in | ||||||
|  |   let rem = o_rem e in | ||||||
|  |   let len = e.t_max - e.t_pos + 1 in | ||||||
|  |   if rem < len then (blit e rem; flush (t_flush k) e) else (blit e len; k e) | ||||||
|  |  | ||||||
|  | (* Encoders. *) | ||||||
|  |  | ||||||
|  | let rec encode_utf_8 e v = | ||||||
|  |   let k e = e.k <- encode_utf_8; `Ok in | ||||||
|  |   match v with | ||||||
|  |   | `Await -> k e | ||||||
|  |   | `End -> flush k e | ||||||
|  |   | `Uchar u as v -> | ||||||
|  |       let rem = o_rem e in | ||||||
|  |       if u <= 0x007F then | ||||||
|  |       if rem < 1 then flush (fun e -> encode_utf_8 e v) e else | ||||||
|  |       (unsafe_set_byte e.o e.o_pos u; e.o_pos <- e.o_pos + 1; k e) | ||||||
|  |       else if u <= 0x07FF then | ||||||
|  |       begin | ||||||
|  |         let s, j, k = | ||||||
|  |           if rem < 2 then (t_range e 1; e.t, 0, t_flush k) else | ||||||
|  |           let j = e.o_pos in (e.o_pos <- e.o_pos + 2; e.o, j, k) | ||||||
|  |         in | ||||||
|  |         unsafe_set_byte s j (0xC0 lor (u lsr 6)); | ||||||
|  |         unsafe_set_byte s (j + 1) (0x80 lor (u land 0x3F)); | ||||||
|  |         k e | ||||||
|  |       end | ||||||
|  |       else if u <= 0xFFFF then | ||||||
|  |       begin | ||||||
|  |         let s, j, k = | ||||||
|  |           if rem < 3 then (t_range e 2; e.t, 0, t_flush k) else | ||||||
|  |           let j = e.o_pos in (e.o_pos <- e.o_pos + 3; e.o, j, k) | ||||||
|  |         in | ||||||
|  |         unsafe_set_byte s j (0xE0 lor (u lsr 12)); | ||||||
|  |         unsafe_set_byte s (j + 1) (0x80 lor ((u lsr 6) land 0x3F)); | ||||||
|  |         unsafe_set_byte s (j + 2) (0x80 lor (u land 0x3F)); | ||||||
|  |         k e | ||||||
|  |       end | ||||||
|  |       else | ||||||
|  |       begin | ||||||
|  |         let s, j, k = | ||||||
|  |           if rem < 4 then (t_range e 3; e.t, 0, t_flush k) else | ||||||
|  |           let j = e.o_pos in (e.o_pos <- e.o_pos + 4; e.o, j, k) | ||||||
|  |         in | ||||||
|  |         unsafe_set_byte s j (0xF0 lor (u lsr 18)); | ||||||
|  |         unsafe_set_byte s (j + 1) (0x80 lor ((u lsr 12) land 0x3F)); | ||||||
|  |         unsafe_set_byte s (j + 2) (0x80 lor ((u lsr 6) land 0x3F)); | ||||||
|  |         unsafe_set_byte s (j + 3) (0x80 lor (u land 0x3F)); | ||||||
|  |         k e | ||||||
|  |       end | ||||||
|  |  | ||||||
|  | let rec encode_utf_16be e v = | ||||||
|  |   let k e = e.k <- encode_utf_16be; `Ok in | ||||||
|  |   match v with | ||||||
|  |   | `Await -> k e | ||||||
|  |   | `End -> flush k e | ||||||
|  |   | `Uchar u -> | ||||||
|  |       let rem = o_rem e in | ||||||
|  |       if u < 0x10000 then | ||||||
|  |       begin | ||||||
|  |         let s, j, k = | ||||||
|  |           if rem < 2 then (t_range e 1; e.t, 0, t_flush k) else | ||||||
|  |           let j = e.o_pos in (e.o_pos <- e.o_pos + 2; e.o, j, k) | ||||||
|  |         in | ||||||
|  |         unsafe_set_byte s j (u lsr 8); | ||||||
|  |         unsafe_set_byte s (j + 1) (u land 0xFF); | ||||||
|  |         k e | ||||||
|  |       end else begin | ||||||
|  |         let s, j, k = | ||||||
|  |           if rem < 4 then (t_range e 3; e.t, 0, t_flush k) else | ||||||
|  |           let j = e.o_pos in (e.o_pos <- e.o_pos + 4; e.o, j, k) | ||||||
|  |         in | ||||||
|  |         let u' = u - 0x10000 in | ||||||
|  |         let hi = (0xD800 lor (u' lsr 10)) in | ||||||
|  |         let lo = (0xDC00 lor (u' land 0x3FF)) in | ||||||
|  |         unsafe_set_byte s j (hi lsr 8); | ||||||
|  |         unsafe_set_byte s (j + 1) (hi land 0xFF); | ||||||
|  |         unsafe_set_byte s (j + 2) (lo lsr 8); | ||||||
|  |         unsafe_set_byte s (j + 3) (lo land 0xFF); | ||||||
|  |         k e | ||||||
|  |       end | ||||||
|  |  | ||||||
|  | let rec encode_utf_16le e v =         (* encode_uft_16be with bytes swapped. *) | ||||||
|  |   let k e = e.k <- encode_utf_16le; `Ok in | ||||||
|  |   match v with | ||||||
|  |   | `Await -> k e | ||||||
|  |   | `End -> flush k e | ||||||
|  |   | `Uchar u -> | ||||||
|  |     let rem = o_rem e in | ||||||
|  |     if u < 0x10000 then | ||||||
|  |     begin | ||||||
|  |       let s, j, k = | ||||||
|  |         if rem < 2 then (t_range e 1; e.t, 0, t_flush k) else | ||||||
|  |         let j = e.o_pos in (e.o_pos <- e.o_pos + 2; e.o, j, k) | ||||||
|  |       in | ||||||
|  |       unsafe_set_byte s j (u land 0xFF); | ||||||
|  |       unsafe_set_byte s (j + 1) (u lsr 8); | ||||||
|  |       k e | ||||||
|  |     end | ||||||
|  |     else | ||||||
|  |     begin | ||||||
|  |       let s, j, k = | ||||||
|  |         if rem < 4 then (t_range e 3; e.t, 0, t_flush k) else | ||||||
|  |         let j = e.o_pos in (e.o_pos <- e.o_pos + 4; e.o, j, k) | ||||||
|  |       in | ||||||
|  |       let u' = u - 0x10000 in | ||||||
|  |       let hi = (0xD800 lor (u' lsr 10)) in | ||||||
|  |       let lo = (0xDC00 lor (u' land 0x3FF)) in | ||||||
|  |       unsafe_set_byte s j (hi land 0xFF); | ||||||
|  |       unsafe_set_byte s (j + 1) (hi lsr 8); | ||||||
|  |       unsafe_set_byte s (j + 2) (lo land 0xFF); | ||||||
|  |       unsafe_set_byte s (j + 3) (lo lsr 8); | ||||||
|  |       k e | ||||||
|  |     end | ||||||
|  |  | ||||||
|  | let encode_fun = function | ||||||
|  | | `UTF_8 -> encode_utf_8 | ||||||
|  | | `UTF_16 -> encode_utf_16be | ||||||
|  | | `UTF_16BE -> encode_utf_16be | ||||||
|  | | `UTF_16LE -> encode_utf_16le | ||||||
|  |  | ||||||
|  | let encoder encoding dst = | ||||||
|  |   let o, o_pos, o_max = match dst with | ||||||
|  |   | `Manual -> "", 1, 0                              (* implies o_rem e = 0. *) | ||||||
|  |   | `Buffer _ | ||||||
|  |   | `Channel _ -> String.create io_buffer_size, 0, io_buffer_size - 1 | ||||||
|  |   in | ||||||
|  |   { dst = (dst :> dst); encoding = (encoding :> encoding); o; o_pos; o_max; | ||||||
|  |     t = String.create 4; t_pos = 1; t_max = 0; k = encode_fun encoding} | ||||||
|  |  | ||||||
|  | let encode e v = e.k e (v :> encode) | ||||||
|  | let encoder_encoding e = e.encoding | ||||||
|  | let encoder_dst e = e.dst | ||||||
|  |  | ||||||
|  | (* Manual sources and destinations. *) | ||||||
|  |  | ||||||
|  | module Manual = struct | ||||||
|  |   let src = src | ||||||
|  |   let dst = dst | ||||||
|  |   let dst_rem = o_rem | ||||||
|  | end | ||||||
|  |  | ||||||
|  | (* Strings folders and Buffer encoders *) | ||||||
|  |  | ||||||
|  | module String = struct | ||||||
|  |   let encoding_guess s = match r_encoding s 0 (max (String.length s) 3) with | ||||||
|  |   | `UTF_8 d -> `UTF_8, (d = `BOM) | ||||||
|  |   | `UTF_16BE d -> `UTF_16BE, (d = `BOM) | ||||||
|  |   | `UTF_16LE d -> `UTF_16LE, (d = `BOM) | ||||||
|  |  | ||||||
|  |   type 'a folder = | ||||||
|  |     'a -> int -> [ `Uchar of uchar | `Malformed of string ] -> 'a | ||||||
|  |  | ||||||
|  |   let fold_utf_8 f acc s = | ||||||
|  |     let rec loop acc f s i l = | ||||||
|  |       if i = l then acc else | ||||||
|  |       let need = unsafe_array_get utf_8_len (unsafe_byte s i) in | ||||||
|  |       if need = 0 then loop (f acc i (malformed s i 1)) f s (i + 1) l else | ||||||
|  |       let rem = l - i in | ||||||
|  |       if rem < need then f acc i (malformed s i rem) else | ||||||
|  |       loop (f acc i (r_utf_8 s i need)) f s (i + need) l | ||||||
|  |     in | ||||||
|  |     loop acc f s 0 (String.length s) | ||||||
|  |  | ||||||
|  |   let fold_utf_16be f acc s = | ||||||
|  |     let rec loop acc f s i l = | ||||||
|  |       if i = l then acc else | ||||||
|  |       let rem = l - i in | ||||||
|  |       if rem < 2 then f acc i (malformed s i 1) else | ||||||
|  |       match r_utf_16 s i (i + 1) with | ||||||
|  |       | `Uchar _ | `Malformed _ as v -> loop (f acc i v) f s (i + 2) l | ||||||
|  |       | `Hi hi -> | ||||||
|  |           if rem < 4 then f acc i (malformed s i rem)  else | ||||||
|  |           loop (f acc i (r_utf_16_lo hi s (i + 2) (i + 3))) f s (i + 4) l | ||||||
|  |     in | ||||||
|  |     loop acc f s 0 (String.length s) | ||||||
|  |  | ||||||
|  |   let fold_utf_16le f acc s =             (* [fold_utf_16be], bytes swapped. *) | ||||||
|  |     let rec loop acc f s i l = | ||||||
|  |       if i = l then acc else | ||||||
|  |       let rem = l - i in | ||||||
|  |       if rem < 2 then f acc i (malformed s i 1) else | ||||||
|  |       match r_utf_16 s (i + 1) i with | ||||||
|  |       | `Uchar _ | `Malformed _ as v -> loop (f acc i v) f s (i + 2) l | ||||||
|  |       | `Hi hi -> | ||||||
|  |           if rem < 4 then f acc i (malformed s i rem)  else | ||||||
|  |           loop (f acc i (r_utf_16_lo hi s (i + 3) (i + 2))) f s (i + 4) l | ||||||
|  |     in | ||||||
|  |     loop acc f s 0 (String.length s) | ||||||
|  | end | ||||||
|  |  | ||||||
|  | module Buffer = struct | ||||||
|  |   let add_utf_8 b u = | ||||||
|  |     let w byte = Buffer.add_char b (unsafe_chr byte) in          (* inlined. *) | ||||||
|  |     if u <= 0x007F then | ||||||
|  |     (w u) | ||||||
|  |     else if u <= 0x07FF then | ||||||
|  |     (w (0xC0 lor (u lsr 6)); | ||||||
|  |      w (0x80 lor (u land 0x3F))) | ||||||
|  |     else if u <= 0xFFFF then | ||||||
|  |     (w (0xE0 lor (u lsr 12)); | ||||||
|  |      w (0x80 lor ((u lsr 6) land 0x3F)); | ||||||
|  |      w (0x80 lor (u land 0x3F))) | ||||||
|  |     else | ||||||
|  |     (w (0xF0 lor (u lsr 18)); | ||||||
|  |      w (0x80 lor ((u lsr 12) land 0x3F)); | ||||||
|  |      w (0x80 lor ((u lsr 6) land 0x3F)); | ||||||
|  |      w (0x80 lor (u land 0x3F))) | ||||||
|  |  | ||||||
|  |   let add_utf_16be b u = | ||||||
|  |     let w byte = Buffer.add_char b (unsafe_chr byte) in          (* inlined. *) | ||||||
|  |     if u < 0x10000 then (w (u lsr 8); w (u land 0xFF)) else | ||||||
|  |     let u' = u - 0x10000 in | ||||||
|  |     let hi = (0xD800 lor (u' lsr 10)) in | ||||||
|  |     let lo = (0xDC00 lor (u' land 0x3FF)) in | ||||||
|  |     w (hi lsr 8); w (hi land 0xFF); | ||||||
|  |     w (lo lsr 8); w (lo land 0xFF) | ||||||
|  |  | ||||||
|  |   let add_utf_16le b u =                            (* swapped add_utf_16be. *) | ||||||
|  |     let w byte = Buffer.add_char b (unsafe_chr byte) in          (* inlined. *) | ||||||
|  |     if u < 0x10000 then (w (u land 0xFF); w (u lsr 8)) else | ||||||
|  |     let u' = u - 0x10000 in | ||||||
|  |     let hi = (0xD800 lor (u' lsr 10)) in | ||||||
|  |     let lo = (0xDC00 lor (u' land 0x3FF)) in | ||||||
|  |     w (hi land 0xFF); w (hi lsr 8); | ||||||
|  |     w (lo land 0xFF); w (lo lsr 8) | ||||||
|  | end | ||||||
|  |  | ||||||
|  | (*--------------------------------------------------------------------------- | ||||||
|  |    Copyright 2012 Daniel C. Bünzli | ||||||
|  |    All rights reserved. | ||||||
|  |  | ||||||
|  |    Redistribution and use in source and binary forms, with or without | ||||||
|  |    modification, are permitted provided that the following conditions | ||||||
|  |    are met: | ||||||
|  |  | ||||||
|  |    1. Redistributions of source code must retain the above copyright | ||||||
|  |       notice, this list of conditions and the following disclaimer. | ||||||
|  |  | ||||||
|  |    2. Redistributions in binary form must reproduce the above | ||||||
|  |       copyright notice, this list of conditions and the following | ||||||
|  |       disclaimer in the documentation and/or other materials provided | ||||||
|  |       with the distribution. | ||||||
|  |  | ||||||
|  |    3. Neither the name of Daniel C. Bünzli nor the names of | ||||||
|  |       contributors may be used to endorse or promote products derived | ||||||
|  |       from this software without specific prior written permission. | ||||||
|  |  | ||||||
|  |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  |    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  |    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  |    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  |    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  |    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  |    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  |    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  |    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  |   ---------------------------------------------------------------------------*) | ||||||
		Reference in New Issue
	
	Block a user