Commits (40)
build-and-test:
---
.build_template:
image: ocaml/opam2:alpine-3.10-ocaml-4.08
script:
before_script:
- sudo apk add --update gmp-dev m4 perl
- opam repository set-url default https://opam.ocaml.org
- opam update
- opam pin --no-action data-encoding .
- opam depext ocamlformat.0.11.0
- opam install ocamlformat.0.11.0
- dune build @fmt
- opam depext data-encoding
- opam install --deps-only data-encoding
cache:
key: "$CI_COMMIT_REF_SLUG"
paths:
- _build
build:
extends: .build_template
stage: build
script:
- dune build
test:
extends: .build_template
stage: test
script:
- opam depext ocamlformat.0.11.0
- opam install ocamlformat.0.11.0
- dune build @fmt
- opam depext --dry-run crowbar alcotest
- opam install --deps-only --with-test .
- dune runtest
pages:
extends: .build_template
stage: deploy
artifacts:
paths:
- public/
rules:
- if: '$CI_COMMIT_BRANCH == "master"'
script:
- opam install odoc
- dune build @doc
- mv _build/default/_doc/_html public/
......@@ -8,3 +8,12 @@ v0.2:
- CI tests
- error management improvements (use result, allow exn and option)
- do not print 0-sized fields in binary descriptions
v0.3:
- Adapt to json-data-encoding.0.9.1 and provide json-lexeme seq to string seq
- Improved performance
- `maximum_length` to determine static size bounds (when possible)
- provide `to_`/`of_string` alongside `to_`/`of_bytes`
- Improved documentation
- Increase test coverage
- Fix JSON encoding of Result
Open Source License
Copyright (c) 2018 Dynamic Ledger Solutions, Inc. <contact@tezos.com>
Copyright (c) 2018 Nomadic Labs, <contact@nomadic-labs.com>
Copyright (c) 2018-2021 Nomadic Labs, <contact@nomadic-labs.com>
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
......
......@@ -4,21 +4,23 @@ Data-encoding
A library for encoding and decoding data. It offers a great degree of control over the layout of data. It supports json and binary serialisation/deserialisation.
Use
---
Usage
-----
For a type `t`, you can use the library's combinators to build a encoding `t
For a type `t`, you can use the library's combinators to build an encoding `t
encoding`. You can then use the various reading/writing functions with this
encoding to serialise and deserialise values of the type `t`.
Example:
```
open Data_encoding
type t = (string * int) list
let encoding = list (tup2 string int31)
let v = [("foo", 32); ("bar", 0)]
let j = Json.construct encoding v
let w = Json.destruct j
let w = Json.destruct encoding j
let () = assert (v = w)
```
......
......@@ -6,14 +6,14 @@ bug-reports: "https://gitlab.com/nomadic-labs/data-encoding/issues"
dev-repo: "git+https://gitlab.com/nomadic-labs/data-encoding.git"
license: "MIT"
depends: [
"ocaml" { >= "4.07" }
"ocaml" { >= "4.08" }
"dune" { >= "1.7" }
"ezjsonm"
"zarith"
"json-data-encoding" { = "0.8" }
"json-data-encoding-bson" { = "0.8" }
"json-data-encoding" { = "0.9.1" }
"json-data-encoding-bson" { = "0.9.1" }
"alcotest" { with-test }
"crowbar" { with-test }
"crowbar" { >= "0.2" & with-test }
]
build: [
["dune" "build" "-p" name "-j" jobs]
......
(lang dune 1.7)
(lang dune 1.11)
(name data-encoding)
(using fmt 1.2)
(using fmt 1.1)
......@@ -41,7 +41,7 @@ module UF : sig
val find : t -> string -> Binary_schema.description
val union :
t -> new_cannonical:Binary_schema.description -> existing:string -> unit
t -> new_canonical:Binary_schema.description -> existing:string -> unit
val empty : unit -> t
end = struct
......@@ -56,11 +56,11 @@ end = struct
let rec find tbl key =
match Hashtbl.find tbl key with Ref s -> find tbl s | Root desc -> desc
let union tbl ~new_cannonical ~existing =
add tbl new_cannonical ;
let union tbl ~new_canonical ~existing =
add tbl new_canonical ;
let root = find tbl existing in
if root.title = new_cannonical.title then ()
else Hashtbl.replace tbl root.title (Ref new_cannonical.title)
if root.title = new_canonical.title then ()
else Hashtbl.replace tbl root.title (Ref new_canonical.title)
let empty () = Hashtbl.create 128
end
......@@ -170,7 +170,7 @@ let dedup_canonicalize uf =
Hashtbl.add tbl layout desc ;
help prev_len ((desc.title, layout) :: acc) tl
| Some original_desc ->
UF.union uf ~new_cannonical:original_desc ~existing:name ;
UF.union uf ~new_canonical:original_desc ~existing:name ;
help prev_len acc tl )
in
help 0 []
......@@ -268,12 +268,9 @@ let describe (type x) (encoding : x Encoding.t) =
let cases =
List.sort (fun (t1, _) (t2, _) -> (compare : int -> int -> int) t1 t2)
@@ List.fold_left
(fun acc case ->
match case with
| Case {tag = Json_only; _} ->
acc
| Case {tag = Tag tag; _} ->
(tag, case) :: acc)
(fun acc (Case {tag; _} as case) ->
if Uint_option.is_some tag then (Uint_option.get tag, case) :: acc
else acc)
[]
cases
in
......@@ -308,15 +305,15 @@ let describe (type x) (encoding : x Encoding.t) =
b desc ->
string * references =
fun ?description ~title name recursives references encoding ->
let new_cannonical = {Binary_schema.title; description} in
UF.add uf new_cannonical ;
let new_canonical = {Binary_schema.title; description} in
UF.add uf new_canonical ;
let (layout, references) = layout None recursives references encoding in
match layout with
| Ref ref_name ->
UF.union uf ~existing:ref_name ~new_cannonical ;
UF.union uf ~existing:ref_name ~new_canonical ;
(ref_name, references)
| layout ->
UF.add uf new_cannonical ;
UF.add uf new_canonical ;
( name,
add_reference
name
......@@ -407,7 +404,7 @@ let describe (type x) (encoding : x Encoding.t) =
fields None recursives references right.encoding
in
(fields1 @ fields2, references)
| Union {kind; tag_size; cases} ->
| Union {kind; tag_size; cases; _} ->
let (name, references) =
union None recursives references kind tag_size cases
in
......@@ -589,7 +586,7 @@ let describe (type x) (encoding : x Encoding.t) =
let (fields, references) = fields None recursives references descr in
let references = add_reference name (obj fields) references in
(Ref name, references)
| Union {kind; tag_size; cases} ->
| Union {kind; tag_size; cases; _} ->
let (name, references) =
union ref_name recursives references kind tag_size cases
in
......@@ -637,7 +634,7 @@ let describe (type x) (encoding : x Encoding.t) =
(fun (name, encoding) ->
match encoding with
| Binary_schema.Obj {fields = [Anonymous_field (_, Ref reference)]} ->
UF.union uf ~new_cannonical:(UF.find uf name) ~existing:reference ;
UF.union uf ~new_canonical:(UF.find uf name) ~existing:reference ;
false
| _ ->
true)
......
......@@ -23,20 +23,9 @@
(* *)
(*****************************************************************************)
type read_error =
| Not_enough_data
| Extra_bytes
| No_case_matched
| Unexpected_tag of int
| Invalid_size of int
| Invalid_int of {min : int; v : int; max : int}
| Invalid_float of {min : float; v : float; max : float}
| Trailing_zero
| Size_limit_exceeded
| List_too_long
| Array_too_long
open Binary_error_types
let read_error_encoding =
let read_error_encoding : read_error Encoding.t =
let open Encoding in
union
[
......@@ -56,7 +45,7 @@ let read_error_encoding =
(Tag 2)
~title:"No case matched"
empty
(function No_case_matched -> Some () | _ -> None)
(function (No_case_matched : read_error) -> Some () | _ -> None)
(fun () -> No_case_matched);
case
(Tag 3)
......@@ -75,14 +64,20 @@ let read_error_encoding =
~title:"Invalid int"
(obj3 (req "min" int31) (req "v" int31) (req "max" int31))
(function
| Invalid_int {min; v; max} -> Some (min, v, max) | _ -> None)
| (Invalid_int {min; v; max} : read_error) ->
Some (min, v, max)
| _ ->
None)
(fun (min, v, max) -> Invalid_int {min; v; max});
case
(Tag 6)
~title:"Invalid float"
(obj3 (req "min" float) (req "v" float) (req "max" float))
(function
| Invalid_float {min; v; max} -> Some (min, v, max) | _ -> None)
| (Invalid_float {min; v; max} : read_error) ->
Some (min, v, max)
| _ ->
None)
(fun (min, v, max) -> Invalid_float {min; v; max});
case
(Tag 7)
......@@ -94,19 +89,19 @@ let read_error_encoding =
(Tag 8)
~title:"Size limit exceeded"
empty
(function Size_limit_exceeded -> Some () | _ -> None)
(function (Size_limit_exceeded : read_error) -> Some () | _ -> None)
(fun () -> Size_limit_exceeded);
case
(Tag 9)
~title:"List too long"
empty
(function List_too_long -> Some () | _ -> None)
(function (List_too_long : read_error) -> Some () | _ -> None)
(fun () -> List_too_long);
case
(Tag 10)
~title:"Array too long"
empty
(function Array_too_long -> Some () | _ -> None)
(function (Array_too_long : read_error) -> Some () | _ -> None)
(fun () -> Array_too_long);
]
......@@ -134,19 +129,6 @@ let pp_read_error ppf = function
| Array_too_long ->
Format.fprintf ppf "Array length limit exceeded"
exception Read_error of read_error
type write_error =
| Size_limit_exceeded
| No_case_matched
| Invalid_int of {min : int; v : int; max : int}
| Invalid_float of {min : float; v : float; max : float}
| Invalid_bytes_length of {expected : int; found : int}
| Invalid_string_length of {expected : int; found : int}
| Invalid_natural
| List_too_long
| Array_too_long
let write_error_encoding =
let open Encoding in
union
......@@ -244,5 +226,3 @@ let pp_write_error ppf = function
Format.fprintf ppf "List length limit exceeded"
| Array_too_long ->
Format.fprintf ppf "Array length limit exceeded"
exception Write_error of write_error
......@@ -26,38 +26,12 @@
(** This is for use *within* the data encoding library only. Instead, you should
use the corresponding module intended for use: {!Data_encoding.Binary}. *)
type read_error =
| Not_enough_data
| Extra_bytes
| No_case_matched
| Unexpected_tag of int
| Invalid_size of int
| Invalid_int of {min : int; v : int; max : int}
| Invalid_float of {min : float; v : float; max : float}
| Trailing_zero
| Size_limit_exceeded
| List_too_long
| Array_too_long
exception Read_error of read_error
open Binary_error_types
val read_error_encoding : read_error Encoding.t
val pp_read_error : Format.formatter -> read_error -> unit
type write_error =
| Size_limit_exceeded
| No_case_matched
| Invalid_int of {min : int; v : int; max : int}
| Invalid_float of {min : float; v : float; max : float}
| Invalid_bytes_length of {expected : int; found : int}
| Invalid_string_length of {expected : int; found : int}
| Invalid_natural
| List_too_long
| Array_too_long
val write_error_encoding : write_error Encoding.t
val pp_write_error : Format.formatter -> write_error -> unit
exception Write_error of write_error
(*****************************************************************************)
(* *)
(* Open Source License *)
(* Copyright (c) 2021 Nomadic Labs, <contact@nomadic-labs.com> *)
(* *)
(* Permission is hereby granted, free of charge, to any person obtaining a *)
(* copy of this software and associated documentation files (the "Software"),*)
(* to deal in the Software without restriction, including without limitation *)
(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *)
(* and/or sell copies of the Software, and to permit persons to whom the *)
(* Software is furnished to do so, subject to the following conditions: *)
(* *)
(* The above copyright notice and this permission notice shall be included *)
(* in all copies or substantial portions of the Software. *)
(* *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *)
(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *)
(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *)
(* DEALINGS IN THE SOFTWARE. *)
(* *)
(*****************************************************************************)
type read_error =
| Not_enough_data
| Extra_bytes
| No_case_matched
| Unexpected_tag of int
| Invalid_size of int
| Invalid_int of {min : int; v : int; max : int}
| Invalid_float of {min : float; v : float; max : float}
| Trailing_zero
| Size_limit_exceeded
| List_too_long
| Array_too_long
exception Read_error of read_error
type write_error =
| Size_limit_exceeded
| No_case_matched
| Invalid_int of {min : int; v : int; max : int}
| Invalid_float of {min : float; v : float; max : float}
| Invalid_bytes_length of {expected : int; found : int}
| Invalid_string_length of {expected : int; found : int}
| Invalid_natural
| List_too_long
| Array_too_long
exception Write_error of write_error
(*****************************************************************************)
(* *)
(* Open Source License *)
(* Copyright (c) 2021 Nomadic Labs, <contact@nomadic-labs.com> *)
(* *)
(* Permission is hereby granted, free of charge, to any person obtaining a *)
(* copy of this software and associated documentation files (the "Software"),*)
(* to deal in the Software without restriction, including without limitation *)
(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *)
(* and/or sell copies of the Software, and to permit persons to whom the *)
(* Software is furnished to do so, subject to the following conditions: *)
(* *)
(* The above copyright notice and this permission notice shall be included *)
(* in all copies or substantial portions of the Software. *)
(* *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *)
(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *)
(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *)
(* DEALINGS IN THE SOFTWARE. *)
(* *)
(*****************************************************************************)
(** This is for use *within* the data encoding library only. Instead, you should
use the corresponding module intended for use: {!Data_encoding.Binary}. *)
type read_error =
| Not_enough_data
| Extra_bytes
| No_case_matched
| Unexpected_tag of int
| Invalid_size of int
| Invalid_int of {min : int; v : int; max : int}
| Invalid_float of {min : float; v : float; max : float}
| Trailing_zero
| Size_limit_exceeded
| List_too_long
| Array_too_long
exception Read_error of read_error
type write_error =
| Size_limit_exceeded
| No_case_matched
| Invalid_int of {min : int; v : int; max : int}
| Invalid_float of {min : float; v : float; max : float}
| Invalid_bytes_length of {expected : int; found : int}
| Invalid_string_length of {expected : int; found : int}
| Invalid_natural
| List_too_long
| Array_too_long
exception Write_error of write_error
......@@ -23,7 +23,7 @@
(* *)
(*****************************************************************************)
open Binary_error
open Binary_error_types
let n_length value =
let bits = Z.numbits value in
......@@ -90,20 +90,6 @@ let rec length : type x. x Encoding.t -> x -> int =
| Tups {kind = `Dynamic; left; right} ->
let (v1, v2) = value in
length left v1 + length right v2
| Union {kind = `Dynamic; tag_size; cases} ->
let rec length_case = function
| [] ->
raise (Write_error No_case_matched)
| Case {tag = Json_only; _} :: tl ->
length_case tl
| Case {encoding = e; proj; _} :: tl -> (
match proj value with
| None ->
length_case tl
| Some value ->
Binary_size.tag_size tag_size + length e value )
in
length_case cases
| Mu {kind = `Dynamic; fix; _} ->
length (fix e) value
| Obj (Opt {kind = `Dynamic; encoding = e; _}) -> (
......@@ -131,22 +117,13 @@ let rec length : type x. x Encoding.t -> x -> int =
length left v1 + length right v2
| Obj (Opt {kind = `Variable; encoding = e; _}) -> (
match value with None -> 0 | Some value -> length e value )
| Union {kind = `Variable; tag_size; cases} ->
let rec length_case = function
| [] ->
raise (Write_error No_case_matched)
| Case {tag = Json_only; _} :: tl ->
length_case tl
| Case {encoding = e; proj; _} :: tl -> (
match proj value with
| None ->
length_case tl
| Some value ->
Binary_size.tag_size tag_size + length e value )
in
length_case cases
| Mu {kind = `Variable; fix; _} ->
length (fix e) value
(* Variable or Dynamic we don't care for those constructors *)
| Union {kind = `Dynamic | `Variable; tag_size; match_case; _} ->
let (Matched (tag, e, value)) = match_case value in
assert (tag <= Binary_size.max_int tag_size) ;
Binary_size.tag_size tag_size + length e value
(* Recursive*)
| Obj (Req {encoding = e; _}) ->
length e value
......@@ -177,9 +154,121 @@ let fixed_length e =
| `Dynamic | `Variable ->
None
let fixed_length_exn e =
match fixed_length e with
| Some n ->
n
| None ->
invalid_arg "Data_encoding.Binary.fixed_length_exn"
let rec maximum_length : type a. a Encoding.t -> int option =
fun e ->
let ( >>? ) = Option.bind in
let ( >|? ) x f = Option.map f x in
let open Encoding in
match e.encoding with
(* Fixed *)
| Null ->
Some 0
| Empty ->
Some 0
| Constant _ ->
Some 0
| Bool ->
Some Binary_size.bool
| Int8 ->
Some Binary_size.int8
| Uint8 ->
Some Binary_size.uint8
| Int16 ->
Some Binary_size.int16
| Uint16 ->
Some Binary_size.uint16
| Int31 ->
Some Binary_size.int31
| Int32 ->
Some Binary_size.int32
| Int64 ->
Some Binary_size.int64
| N ->
None
| Z ->
None
| RangedInt {minimum; maximum} ->
Some
( Binary_size.integer_to_size
@@ Binary_size.range_to_size ~minimum ~maximum )
| Float ->
Some Binary_size.float
| RangedFloat _ ->
Some Binary_size.float
| Bytes (`Fixed n) ->
Some n
| String (`Fixed n) ->
Some n
| Padded (e, n) ->
maximum_length e >|? fun s -> s + n
| String_enum (_, arr) ->
Some (Binary_size.integer_to_size @@ Binary_size.enum_size arr)
| Objs {kind = `Fixed n; _} ->
Some n
| Tups {kind = `Fixed n; _} ->
Some n
| Union {kind = `Fixed n; _} ->
Some n
(* Dynamic *)
| Obj (Opt {kind = `Dynamic; encoding = e; _}) ->
maximum_length e >|? fun s -> s + Binary_size.uint8
(* Variable *)
| Ignore ->
Some 0
| Bytes `Variable ->
None
| String `Variable ->
None
| Array (Some max_length, e) ->
maximum_length e >|? fun s -> s * max_length
| Array (None, _) ->
None
| List (Some max_length, e) ->
maximum_length e >|? fun s -> s * max_length
| List (None, _) ->
None
| Obj (Opt {kind = `Variable; encoding = e; _}) ->
maximum_length e
(* Variable or Dynamic we don't care for those constructors *)
| Union {kind = `Dynamic | `Variable; tag_size; cases; _} ->
List.fold_left
(fun acc (Case {encoding = e; _}) ->
acc >>? fun acc -> maximum_length e >|? fun s -> Stdlib.max acc s)
(Some 0)
cases
>|? fun s -> s + Binary_size.tag_size tag_size
| Objs {kind = `Dynamic | `Variable; left; right} ->
maximum_length left >>? fun l -> maximum_length right >|? fun r -> l + r
| Tups {kind = `Dynamic | `Variable; left; right} ->
maximum_length left >>? fun l -> maximum_length right >|? fun r -> l + r
| Mu _ ->
(* There could be bounded-size uses of Mu but it's unreasonable to expect
to detect them statically this way. Use `check_size` around the mu to
translate user-invariants into static encoding invariants *)
None
(* Recursive*)
| Obj (Req {encoding = e; _}) ->
maximum_length e
| Obj (Dft {encoding = e; _}) ->
maximum_length e
| Tup e ->
maximum_length e
| Conv {encoding = e; _} ->
maximum_length e
| Describe {encoding = e; _} ->
maximum_length e
| Splitted {encoding = e; _} ->
maximum_length e
| Dynamic_size {kind; encoding = e} ->
maximum_length e >|? fun s -> s + Binary_size.integer_to_size kind
| Check_size {limit; encoding = e} ->
(* NOTE: it is possible that the statically-provable maximum size exceeds
the dynamically checked limit. But the difference might be explained by
subtle invariants that do not appear in the encoding. *)
Some
(Option.fold
(maximum_length e)
~some:(fun s -> min s limit)
~none:limit)
| Delayed f ->
maximum_length (f ())
......@@ -30,7 +30,7 @@ val length : 'a Encoding.t -> 'a -> int
val fixed_length : 'a Encoding.t -> int option
val fixed_length_exn : 'a Encoding.t -> int
val maximum_length : 'a Encoding.t -> int option
val z_length : Z.t -> int
......
......@@ -23,12 +23,12 @@
(* *)
(*****************************************************************************)
open Binary_error
open Binary_error_types
let raise e = raise (Read_error e)
type state = {
buffer : Bytes.t;
buffer : string;
mutable offset : int;
mutable remaining_bytes : int;
mutable allowed_bytes : int option;
......@@ -56,32 +56,32 @@ let read_atom size conv state =
(** Reader for all the atomic types. *)
module Atom = struct
let uint8 = read_atom Binary_size.uint8 TzEndian.get_uint8
let uint8 = read_atom Binary_size.uint8 TzEndian.get_uint8_string
let uint16 = read_atom Binary_size.int16 TzEndian.get_uint16
let uint16 = read_atom Binary_size.int16 TzEndian.get_uint16_string
let int8 = read_atom Binary_size.int8 TzEndian.get_int8
let int8 = read_atom Binary_size.int8 TzEndian.get_int8_string
let int16 = read_atom Binary_size.int16 TzEndian.get_int16
let int16 = read_atom Binary_size.int16 TzEndian.get_int16_string
let int32 = read_atom Binary_size.int32 TzEndian.get_int32
let int32 = read_atom Binary_size.int32 TzEndian.get_int32_string
let int64 = read_atom Binary_size.int64 TzEndian.get_int64
let int64 = read_atom Binary_size.int64 TzEndian.get_int64_string
let float = read_atom Binary_size.float TzEndian.get_double
let float = read_atom Binary_size.float TzEndian.get_double_string
let bool state = int8 state <> 0
let uint30 =
read_atom Binary_size.uint30
@@ fun buffer ofs ->
let v = Int32.to_int (TzEndian.get_int32 buffer ofs) in
let v = Int32.to_int (TzEndian.get_int32_string buffer ofs) in
if v < 0 then raise (Invalid_int {min = 0; v; max = (1 lsl 30) - 1}) ;
v
let int31 =
read_atom Binary_size.int31
@@ fun buffer ofs -> Int32.to_int (TzEndian.get_int32 buffer ofs)
@@ fun buffer ofs -> Int32.to_int (TzEndian.get_int32_string buffer ofs)
let int = function
| `Int31 ->
......@@ -174,10 +174,11 @@ module Atom = struct
arr.(index)
let fixed_length_bytes length =
read_atom length @@ fun buf ofs -> Bytes.sub buf ofs length
read_atom length
@@ fun buf ofs -> Bytes.unsafe_of_string @@ String.sub buf ofs length
let fixed_length_string length =
read_atom length @@ fun buf ofs -> Bytes.sub_string buf ofs length
read_atom length @@ fun buf ofs -> String.sub buf ofs length
let tag = function `Uint8 -> uint8 | `Uint16 -> uint16
end
......@@ -237,11 +238,11 @@ let rec read_rec : type ret. ret Encoding.t -> state -> ret =
Atom.string_enum arr state
| Array (max_length, e) ->
let max_length = match max_length with Some l -> l | None -> max_int in
let l = read_list List_too_long max_length e state in
let l = read_list Array_too_long max_length e state in
Array.of_list l
| List (max_length, e) ->
let max_length = match max_length with Some l -> l | None -> max_int in
read_list Array_too_long max_length e state
read_list List_too_long max_length e state
| Obj (Req {encoding = e; _}) ->
read_rec e state
| Obj (Dft {encoding = e; _}) ->
......@@ -279,20 +280,12 @@ let rec read_rec : type ret. ret Encoding.t -> state -> ret =
read_variable_pair left right state
| Conv {inj; encoding; _} ->
inj (read_rec encoding state)
| Union {tag_size; cases; _} ->
| Union {tag_size; tagged_cases; _} ->
let ctag = Atom.tag tag_size state in
let (Case {encoding; inj; _}) =
try
List.find
(function
| Case {tag = Tag tag; _} ->
tag = ctag
| Case {tag = Json_only; _} ->
false)
cases
with Not_found -> raise (Unexpected_tag ctag)
in
inj (read_rec encoding state)
if ctag >= Array.length tagged_cases then raise (Unexpected_tag ctag) ;
let (Case {inj; encoding; _} as case) = tagged_cases.(ctag) in
if is_undefined_case case then raise (Unexpected_tag ctag)
else inj (read_rec encoding state)
| Dynamic_size {kind; encoding = e} ->
let sz = Atom.int kind state in
let remaining = check_remaining_bytes state sz in
......@@ -389,8 +382,8 @@ let read encoding buffer ofs len =
let read_opt encoding buffer ofs len =
try Some (read_exn encoding buffer ofs len) with Read_error _ -> None
let of_bytes_exn encoding buffer =
let len = Bytes.length buffer in
let of_string_exn encoding buffer =
let len = String.length buffer in
let state =
{buffer; offset = 0; remaining_bytes = len; allowed_bytes = None}
in
......@@ -398,8 +391,17 @@ let of_bytes_exn encoding buffer =
if state.offset <> len then raise Extra_bytes ;
v
let of_string encoding buffer =
try Ok (of_string_exn encoding buffer) with Read_error err -> Error err
let of_string_opt encoding buffer =
try Some (of_string_exn encoding buffer) with Read_error _ -> None
let of_bytes_exn encoding buffer =
of_string_exn encoding (Bytes.unsafe_to_string buffer)
let of_bytes encoding buffer =
try Ok (of_bytes_exn encoding buffer) with Read_error err -> Error err
of_string encoding (Bytes.unsafe_to_string buffer)
let of_bytes_opt encoding buffer =
try Some (of_bytes_exn encoding buffer) with Read_error _ -> None
of_string_opt encoding (Bytes.unsafe_to_string buffer)
......@@ -28,17 +28,25 @@
val read :
'a Encoding.t ->
Bytes.t ->
string ->
int ->
int ->
(int * 'a, Binary_error.read_error) result
(int * 'a, Binary_error_types.read_error) result
val read_opt : 'a Encoding.t -> Bytes.t -> int -> int -> (int * 'a) option
val read_opt : 'a Encoding.t -> string -> int -> int -> (int * 'a) option
val read_exn : 'a Encoding.t -> Bytes.t -> int -> int -> int * 'a
val read_exn : 'a Encoding.t -> string -> int -> int -> int * 'a
val of_bytes : 'a Encoding.t -> Bytes.t -> ('a, Binary_error.read_error) result
val of_bytes :
'a Encoding.t -> Bytes.t -> ('a, Binary_error_types.read_error) result
val of_bytes_opt : 'a Encoding.t -> Bytes.t -> 'a option
val of_bytes_exn : 'a Encoding.t -> Bytes.t -> 'a
val of_string :
'a Encoding.t -> string -> ('a, Binary_error_types.read_error) result
val of_string_opt : 'a Encoding.t -> string -> 'a option
val of_string_exn : 'a Encoding.t -> string -> 'a
......@@ -23,7 +23,7 @@
(* *)
(*****************************************************************************)
open Binary_error
open Binary_error_types
let raise e = raise (Read_error e)
......@@ -32,11 +32,11 @@ type state = {
stream : Binary_stream.t; (** All the remaining data to be read. *)
remaining_bytes : int option;
(** Total number of bytes that should be from 'stream' (None =
illimited). Reading less bytes should raise [Extra_bytes] and
unlimited). Reading less bytes should raise [Extra_bytes] and
trying to read more bytes should raise [Not_enough_data]. *)
allowed_bytes : int option;
(** Maximum number of bytes that are allowed to be read from 'stream'
before to fail (None = illimited). *)
before to fail (None = unlimited). *)
total_read : int;
(** Total number of bytes that has been read from [stream] since the
beginning. *)
......@@ -72,7 +72,7 @@ let check_allowed_bytes state size =
with the decoded value and the updated state.
The function [conv] is also allowed to raise [Read_error err].
In that case the exception is catched and [Error err] is returned.
In that case the exception is caught and [Error err] is returned.
If there is not enough [remaining_bytes] to be read in [state], the
function returns [Error Not_enough_data] instead of calling
......@@ -376,11 +376,8 @@ let rec read_rec :
@@ fun (ctag, state) ->
match
List.find_opt
(function
| Case {tag = Tag tag; _} ->
tag = ctag
| Case {tag = Json_only; _} ->
false)
(fun (Case {tag; _}) ->
Uint_option.fold tag ~none:false ~some:(fun tag -> tag = ctag))
cases
with
| None ->
......
......@@ -29,6 +29,6 @@
type 'ret status =
| Success of {result : 'ret; size : int; stream : Binary_stream.t}
| Await of (Bytes.t -> 'ret status)
| Error of Binary_error.read_error
| Error of Binary_error_types.read_error
val read_stream : ?init:Binary_stream.t -> 'a Encoding.t -> 'a status
......@@ -23,28 +23,36 @@
(* *)
(*****************************************************************************)
open Binary_error
open Binary_error_types
let raise error = raise (Write_error error)
let raise error = Stdlib.raise (Write_error error)
(** Imperative state of the binary writer. *)
type state = {
type writer_state = {
mutable buffer : Bytes.t; (** The buffer where to write. *)
mutable offset : int;
(** The offset of the next byte to be written in [buffer]. *)
mutable allowed_bytes : int option;
mutable allowed_bytes : Uint_option.t;
(** Maximum number of bytes that are allowed to be write in [buffer]
(after [offset]) before to fail (None = illimited). *)
(after [offset]) before to fail (unless it is [unlimited_bytes]). *)
}
let make_writer_state buffer ~offset ~allowed_bytes =
if allowed_bytes < 0 || allowed_bytes > Bytes.length buffer - offset then
None
else
let allowed_bytes = Uint_option.some allowed_bytes in
Some {buffer; offset; allowed_bytes}
let unlimited_bytes = Uint_option.is_none
let limited_bytes = Uint_option.is_some
let check_allowed_bytes state size =
match state.allowed_bytes with
| Some len when len < size ->
raise Size_limit_exceeded
| Some len ->
state.allowed_bytes <- Some (len - size)
| None ->
()
if limited_bytes state.allowed_bytes then (
let allowed_bytes = Uint_option.get state.allowed_bytes in
if allowed_bytes < size then raise Size_limit_exceeded ;
state.allowed_bytes <- Uint_option.(some (allowed_bytes - size)) )
(** [may_resize state size] will first ensure there is enough
space in [state.buffer] for writing [size] bytes (starting at
......@@ -212,7 +220,7 @@ module Atom = struct
end
(** Main recursive writing function. *)
let rec write_rec : type a. a Encoding.t -> state -> a -> unit =
let rec write_rec : type a. a Encoding.t -> writer_state -> a -> unit =
fun e state value ->
let open Encoding in
match e.encoding with
......@@ -295,25 +303,14 @@ let rec write_rec : type a. a Encoding.t -> state -> a -> unit =
write_rec left state v1 ; write_rec right state v2
| Conv {encoding = e; proj; _} ->
write_rec e state (proj value)
| Union {tag_size; cases; _} ->
let rec write_case = function
| [] ->
raise No_case_matched
| Case {tag = Json_only; _} :: tl ->
write_case tl
| Case {encoding = e; proj; tag = Tag tag; _} :: tl -> (
match proj value with
| None ->
write_case tl
| Some value ->
Atom.tag tag_size state tag ;
write_rec e state value )
in
write_case cases
| Union {tag_size; match_case; _} ->
let (Matched (tag, e, value)) = match_case value in
Atom.tag tag_size state tag ;
write_rec e state value
| Dynamic_size {kind; encoding = e} ->
let initial_offset = state.offset in
Atom.int kind state 0 ;
(* place holder for [size] *)
Atom.int kind state 0 ;
write_with_limit (Binary_size.max_int kind) e state value ;
(* patch the written [size] *)
Atom.set_int
......@@ -332,66 +329,99 @@ let rec write_rec : type a. a Encoding.t -> state -> a -> unit =
| Delayed f ->
write_rec (f ()) state value
and write_with_limit : type a. int -> a Encoding.t -> state -> a -> unit =
and write_with_limit : type a. int -> a Encoding.t -> writer_state -> a -> unit
=
fun limit e state value ->
(* backup the current limit *)
let old_limit = state.allowed_bytes in
(* install the new limit (only if smaller than the current limit) *)
let limit =
match state.allowed_bytes with
| None ->
limit
| Some old_limit ->
min old_limit limit
if unlimited_bytes state.allowed_bytes then limit
else
let old_limit = Uint_option.get state.allowed_bytes in
min old_limit limit
in
state.allowed_bytes <- Some limit ;
state.allowed_bytes <- Uint_option.some limit ;
write_rec e state value ;
(* restore the previous limit (minus the read bytes) *)
match old_limit with
| None ->
state.allowed_bytes <- None
| Some old_limit ->
let remaining =
match state.allowed_bytes with None -> assert false | Some len -> len
in
let read = limit - remaining in
state.allowed_bytes <- Some (old_limit - read)
if unlimited_bytes old_limit then state.allowed_bytes <- Uint_option.none
else
let remaining = Uint_option.get state.allowed_bytes in
let read = limit - remaining in
state.allowed_bytes <- Uint_option.(some (get old_limit - read))
(** ******************** *)
(** Various entry points *)
let write_exn e v buffer offset len =
(* By harcoding [allowed_bytes] with the buffer length,
we ensure that [write] will never reallocate the buffer. *)
let state = {buffer; offset; allowed_bytes = Some len} in
write_rec e state v ; state.offset
let write_exn e v state = write_rec e state v ; state.offset
let write e v buffer offset len =
try Ok (write_exn e v buffer offset len) with Write_error err -> Error err
let write e v state =
try Ok (write_exn e v state) with Write_error err -> Error err
let write_opt e v buffer offset len =
try Some (write_exn e v buffer offset len) with Write_error _ -> None
let write_opt e v state =
try Some (write_exn e v state) with Write_error _ -> None
let to_bytes_exn ?(buffer_size = 128) e v =
match Encoding.classify e with
| `Fixed n ->
(* Preallocate the complete buffer *)
let state =
{buffer = Bytes.create n; offset = 0; allowed_bytes = Some n}
{
buffer = Bytes.create n;
offset = 0;
allowed_bytes = Uint_option.some n;
}
in
write_rec e state v ; state.buffer
| `Dynamic | `Variable ->
(* Preallocate a minimal buffer and let's not hardcode a
limit to its extension. *)
let state =
{buffer = Bytes.create buffer_size; offset = 0; allowed_bytes = None}
{
buffer = Bytes.create buffer_size;
offset = 0;
allowed_bytes = Uint_option.none;
}
in
write_rec e state v ;
Bytes.sub state.buffer 0 state.offset
let to_bytes_opt ?buffer_size e v =
Option.iter
(fun buffer_size ->
if buffer_size < 0 then
Stdlib.raise
(Invalid_argument
"Data_encoding.Binary_writer.to_bytes_opt: negative length"))
buffer_size ;
try Some (to_bytes_exn ?buffer_size e v) with Write_error _ -> None
let to_bytes ?buffer_size e v =
Option.iter
(fun buffer_size ->
if buffer_size < 0 then
Stdlib.raise
(Invalid_argument
"Data_encoding.Binary_writer.to_bytes: negative length"))
buffer_size ;
try Ok (to_bytes_exn ?buffer_size e v) with Write_error err -> Error err
let to_bytes_exn ?buffer_size e v =
Option.iter
(fun buffer_size ->
if buffer_size < 0 then
Stdlib.raise
(Invalid_argument
"Data_encoding.Binary_writer.to_bytes: negative length"))
buffer_size ;
to_bytes_exn ?buffer_size e v
let to_string_opt ?buffer_size e v =
Option.map Bytes.unsafe_to_string (to_bytes_opt ?buffer_size e v)
let to_string ?buffer_size e v =
Result.map Bytes.unsafe_to_string (to_bytes ?buffer_size e v)
let to_string_exn ?buffer_size e v =
Bytes.unsafe_to_string (to_bytes_exn ?buffer_size e v)
......@@ -26,24 +26,37 @@
(** This is for use *within* the data encoding library only. Instead, you should
use the corresponding module intended for use: {!Data_encoding.Binary}. *)
type writer_state
val make_writer_state :
bytes -> offset:int -> allowed_bytes:int -> writer_state option
val write :
'a Encoding.t ->
'a ->
Bytes.t ->
int ->
int ->
(int, Binary_error.write_error) result
writer_state ->
(int, Binary_error_types.write_error) result
val write_opt : 'a Encoding.t -> 'a -> Bytes.t -> int -> int -> int option
val write_opt : 'a Encoding.t -> 'a -> writer_state -> int option
val write_exn : 'a Encoding.t -> 'a -> Bytes.t -> int -> int -> int
val write_exn : 'a Encoding.t -> 'a -> writer_state -> int
val to_bytes :
?buffer_size:int ->
'a Encoding.t ->
'a ->
(Bytes.t, Binary_error.write_error) result
(Bytes.t, Binary_error_types.write_error) result
val to_bytes_opt : ?buffer_size:int -> 'a Encoding.t -> 'a -> Bytes.t option
val to_bytes_exn : ?buffer_size:int -> 'a Encoding.t -> 'a -> Bytes.t
val to_string :
?buffer_size:int ->
'a Encoding.t ->
'a ->
(string, Binary_error_types.write_error) result
val to_string_opt : ?buffer_size:int -> 'a Encoding.t -> 'a -> string option
val to_string_exn : ?buffer_size:int -> 'a Encoding.t -> 'a -> string
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* this module is a temporary fix waiting for ocaml 4.08 *)
(** {6 Binary encoding/decoding of integers} *)
external get_uint8 : bytes -> int -> int = "%bytes_safe_get"
external get_uint16_ne : bytes -> int -> int = "%caml_bytes_get16"
external get_int32_ne : bytes -> int -> int32 = "%caml_bytes_get32"
external get_int64_ne : bytes -> int -> int64 = "%caml_bytes_get64"
external set_int8 : bytes -> int -> int -> unit = "%bytes_safe_set"
external set_int16_ne : bytes -> int -> int -> unit = "%caml_bytes_set16"
external set_int32_ne : bytes -> int -> int32 -> unit = "%caml_bytes_set32"
external set_int64_ne : bytes -> int -> int64 -> unit = "%caml_bytes_set64"
external swap16 : int -> int = "%bswap16"
external swap32 : int32 -> int32 = "%bswap_int32"
external swap64 : int64 -> int64 = "%bswap_int64"
let get_int8 b i = (get_uint8 b i lsl (Sys.int_size - 8)) asr (Sys.int_size - 8)
let get_uint16_le b i =
if Sys.big_endian then swap16 (get_uint16_ne b i) else get_uint16_ne b i
let get_uint16_be b i =
if not Sys.big_endian then swap16 (get_uint16_ne b i) else get_uint16_ne b i
let get_int16_ne b i =
(get_uint16_ne b i lsl (Sys.int_size - 16)) asr (Sys.int_size - 16)
let get_int16_le b i =
(get_uint16_le b i lsl (Sys.int_size - 16)) asr (Sys.int_size - 16)
let get_int16_be b i =
(get_uint16_be b i lsl (Sys.int_size - 16)) asr (Sys.int_size - 16)
let get_int32_le b i =
if Sys.big_endian then swap32 (get_int32_ne b i) else get_int32_ne b i
let get_int32_be b i =
if not Sys.big_endian then swap32 (get_int32_ne b i) else get_int32_ne b i
let get_int64_le b i =
if Sys.big_endian then swap64 (get_int64_ne b i) else get_int64_ne b i
let get_int64_be b i =
if not Sys.big_endian then swap64 (get_int64_ne b i) else get_int64_ne b i
let set_int16_le b i x =
if Sys.big_endian then set_int16_ne b i (swap16 x) else set_int16_ne b i x
let set_int16_be b i x =
if not Sys.big_endian then set_int16_ne b i (swap16 x)
else set_int16_ne b i x
let set_int32_le b i x =
if Sys.big_endian then set_int32_ne b i (swap32 x) else set_int32_ne b i x
let set_int32_be b i x =
if not Sys.big_endian then set_int32_ne b i (swap32 x)
else set_int32_ne b i x
let set_int64_le b i x =
if Sys.big_endian then set_int64_ne b i (swap64 x) else set_int64_ne b i x
let set_int64_be b i x =
if not Sys.big_endian then set_int64_ne b i (swap64 x)
else set_int64_ne b i x
let set_uint8 = set_int8
let set_uint16_ne = set_int16_ne
let set_uint16_be = set_int16_be
let set_uint16_le = set_int16_le
module type S = sig
(** {1 Binary encoding/decoding of integers} *)
(** The functions in this section binary encode and decode integers to
and from byte sequences.
All following functions raise [Invalid_argument] if the space
needed at index [i] to decode or encode the integer is not
available.
Little-endian (resp. big-endian) encoding means that least
(resp. most) significant bytes are stored first. Big-endian is
also known as network byte order. Native-endian encoding is
either little-endian or big-endian depending on {!Sys.big_endian}.
32-bit and 64-bit integers are represented by the [int32] and
[int64] types, which can be interpreted either as signed or
unsigned numbers.
8-bit and 16-bit integers are represented by the [int] type,
which has more bits than the binary encoding. These extra bits
are handled as follows: {ul
{- Functions that decode signed (resp. unsigned) 8-bit or 16-bit
integers represented by [int] values sign-extend
(resp. zero-extend) their result.}
{- Functions that encode 8-bit or 16-bit integers represented by
[int] values truncate their input to their least significant
bytes.}}
*)
(** [get_uint8 b i] is [b]'s unsigned 8-bit integer starting at byte index [i].
@since 4.08