diff --git a/.github/workflows/ocaml.yml b/.github/workflows/ocaml.yml index f19f282..ffa1867 100644 --- a/.github/workflows/ocaml.yml +++ b/.github/workflows/ocaml.yml @@ -62,6 +62,8 @@ jobs: run: echo "ref=$(echo ${GITHUB_REF#refs/*/})" >> ${GITHUB_OUTPUT} - uses: actions/upload-artifact@v4 + if: github.event_name == 'push' && + ( github.ref == 'refs/heads/main' || startsWith(github.ref,'refs/tags') ) with: name: doc-${{ steps.vars.outputs.ref }} path: _build/default/_doc/_html/patricia-tree/ diff --git a/CHANGELOG.md b/CHANGELOG.md index eea5ebc..9262dad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,16 @@ # Unreleased +- Patricia Tree now support using negative keys. Tree are built using the bitwise representation + of integer, meaning they effectively use an unsigned order. Negative keys are + considered bigger than positive keys, `0` is the minimal number and `-1` the maximal one. +- Renamed `min_binding`, `max_binding`, `pop_minimum`, `pop_maximum`, `min_elt` + and `max_elt` to `unsigned_min_binding`, `unsigned_max_binding`, + `pop_unsigned_minimum`, `pop_unsigned_maximum`, `unsigned_min_elt` + and `unsigned_max_elt` respectively, to clarify that these functions consider + negative numbers as larger than positive ones. - Fixed a bug where NodeWithId wasn't incrementing ids properly +- `zarith` is no longer a dependency, used GCC's `__builtin_clz` as a faster + method of finding an integer's highest bit. # v0.9.0 - 2024-04-18 diff --git a/README.md b/README.md index 9454a87..7016594 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,7 @@ dune build @doc and the same convention for order of arguments. This should allow switching to and from Patricia Tree with minimal effort. - The functor parameters (`KEY` module) requires an injective `to_int : t -> int` - function instead of a `compare` function. `to_int` should be fast, injective, - and only return positive integers. + function instead of a `compare` function. `to_int` should be fast and injective. This works well with [hash-consed](https://en.wikipedia.org/wiki/Hash_consing) types. - The Patricia Tree representation is stable, contrary to maps, inserting nodes in any order will return the same shape. @@ -77,13 +76,18 @@ dune build @doc for the general one) - Since our Patricia Tree use big-endian order on keys, the maps and sets are - sorted in increasing order of keys. We only support positive integer keys. + sorted in increasing **unsigned order** of keys. + This means negative keys are sorted above positive keys, with `-1` being the + largest possible key, and `0` the smallest. This also avoids a bug in Okasaki's paper discussed in [*QuickChecking Patricia Trees*](https://www.cs.tufts.edu/comp/150FP/archive/jan-midtgaard/qc-patricia.pdf) by Jan Mitgaard. + + It also affects functions like `unsigned_min_binding` and `pop_unsigned_minimum`. They will return the smallest + positive integer of both positive and negative keys are present; and not the smallest negative, as one might expect. - Supports generic maps and sets: a `'m map` that maps `'k key` to `('k, 'm) value`. This is especially useful when using [GADTs](https://v2.ocaml.org/manual/gadts-tutorial.html) for the type of keys. This is also sometimes called a dependent map. -- Allows easy and fast operations across different types of maps and set (e.g. - an intersection between a map and a set), since all sets and maps, no matter their key type, are really positive integer sets or maps. +- Allows easy and fast operations across different types of maps and set + which have the same type of keys (e.g. an intersection between a map and a set). - Multiple choices for internal representation (`NODE`), which allows for efficient storage (no need to store a value for sets), or using weak nodes only (values removed from the tree if no other pointer to it exists). This system can also be extended to store size information in nodes if needed. @@ -294,7 +298,6 @@ These are smaller and closer to OCaml's built-in Map and Set, however: - These libraries work with older version of OCaml (`>= 4.05` I believe), whereas ours requires OCaml `>= 4.14` (for the new interface of `Ephemeron` used in `WeakNode`). -- Our keys are limited to positive integers. ### dmap diff --git a/dune b/dune index 17e5fd7..1285ad0 100644 --- a/dune +++ b/dune @@ -22,8 +22,10 @@ (library (name PatriciaTree) (public_name patricia-tree) - (libraries zarith) - (modules PatriciaTree)) + (modules PatriciaTree) + (foreign_stubs + (language c) + (names int_builtins))) (documentation (package patricia-tree)) @@ -36,5 +38,5 @@ (libraries qcheck-core)) (preprocess (pps ppx_inline_test)) - (libraries PatriciaTree zarith qcheck-core) + (libraries PatriciaTree qcheck-core) (modules PatriciaTreeTest)) diff --git a/dune-project b/dune-project index 91a1a26..621fb83 100644 --- a/dune-project +++ b/dune-project @@ -52,8 +52,6 @@ (depends (ocaml (>= 4.14)) - (zarith - (>= "1.13")) dune (qcheck-core (and diff --git a/index.mld b/index.mld index 49880be..1085e0d 100644 --- a/index.mld +++ b/index.mld @@ -40,12 +40,13 @@ dune build @doc {1 Features} {ul -{li Similar to OCaml's [Map] and [Set], using the same function names when possible +{li Similar to OCaml's {{: https://ocaml.org/api/Map.S.html}[Map]} and {{: https://ocaml.org/api/Set.S.html}[Set]}, + using the same function names when possible and the same convention for order of arguments. This should allow switching to and from Patricia Tree with minimal effort.} {li The functor parameters ({!PatriciaTree.KEY} module) requires an injective [to_int : t -> int] function instead of a [compare] function. {!PatriciaTree.KEY.to_int} should be fast, - injective, and only return positive integers. + and injective. This works well with {{: https://en.wikipedia.org/wiki/Hash_consing}hash-consed} types.} {li The Patricia Tree representation is stable, contrary to maps, inserting nodes in any order will return the same shape. @@ -61,16 +62,22 @@ dune build @doc [idempotent_inter] for the efficient version and [nonidempotent_inter_no_share] for the general one)} {li Since our Patricia Tree use big-endian order on keys, the maps and sets are - sorted in increasing order of keys. We only support positive integer keys. + sorted in increasing {b {{!PatriciaTree.unsigned_lt}unsigned order}} of keys. + This means negative keys are sorted above positive keys, with [-1] being the + largest possible key, and [0] the smallest. This also avoids a bug in Okasaki's paper discussed in {{: https://www.cs.tufts.edu/comp/150FP/archive/jan-midtgaard/qc-patricia.pdf}{i QuickChecking Patricia Trees}} - by Jan Mitgaard.} + by Jan Mitgaard. + + It also affects functions like {{!PatriciaTree.BASE_MAP.unsigned_min_binding}[unsigned_min_binding]} + and {{!PatriciaTree.BASE_MAP.pop_unsigned_minimum}[pop_unsigned_minimum}. They will return the smallest + positive integer of both positive and negative keys are present; and not the smallest negative, + as one might expect.} {li Supports generic maps and sets: a ['m map] that maps ['k key] to [('k, 'm) value]. This is especially useful when using {{: https://v2.ocaml.org/manual/gadts-tutorial.html}GADTs} for the type of keys. This is also sometimes called a dependent map.} -{li Allows easy and fast operations across different types of maps and set (e.g. - an intersection between a map and a set), since all sets and maps, no matter their key type, - are really positive integer sets or maps.} +{li Allows easy and fast operations across different types of maps and set + which have the same type of keys (e.g. an intersection between a map and a set).} {li Multiple choices for internal representation ({!PatriciaTree.NODE}), which allows for efficient storage (no need to store a value for sets), or using weak nodes only (values removed from the tree if no other pointer to it exists). This system can also be extended to store size information in nodes if needed.} @@ -292,7 +299,6 @@ These are smaller and closer to OCaml's built-in [Map] and [Set], however: - These libraries work with older version of OCaml ([>= 4.05] I believe), whereas ours requires OCaml [>= 4.14] (for the new interface of [Ephemeron] used in {!PatriciaTree.WeakNode}). -- Our keys are limited to positive integers. {2 dmap} diff --git a/int_builtins.c b/int_builtins.c new file mode 100644 index 0000000..3ed37b3 --- /dev/null +++ b/int_builtins.c @@ -0,0 +1,65 @@ +/**************************************************************************/ +/* This file is part of the Codex semantics library. */ +/* */ +/* Copyright (C) 2013-2024 */ +/* CEA (Commissariat à l'énergie atomique et aux énergies */ +/* alternatives) */ +/* */ +/* you can redistribute it and/or modify it under the terms of the GNU */ +/* Lesser General Public License as published by the Free Software */ +/* Foundation, version 2.1. */ +/* */ +/* It is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU Lesser General Public License for more details. */ +/* */ +/* See the GNU Lesser General Public License version 2.1 */ +/* for more details (enclosed in the file LICENSE). */ +/* */ +/**************************************************************************/ + +#define CAML_NAME_SPACE +#include +#include +#include + +#ifdef _MSC_VER +#include +#endif + +__attribute__((__always_inline__)) +static inline uintnat clz(uintnat v){ + /* Note: on a 64 bit platform, GCC's _builtin_clz will perform a 32 + bit operation (even if the argument has type int). We have to use + _builtin_clzll instead. */ +#if __GNUC__ + #ifdef ARCH_SIXTYFOUR + return __builtin_clzll(v); + #else + return __builtin_clz(v) + #endif +#endif +#ifdef _MSC_VER + int res = 0; + #ifdef ARCH_SIXTYFOUR + _BitScanReverse64(&res,v); + #else + _BitScanReverse(&res,v); + #endif + return res; +#endif +} + +/**************** Highest bit ****************/ + +CAMLprim uintnat caml_int_builtin_highest_bit (value i){ + /* printf("Highest bit In C: %x %x %x %x\n", */ + /* i, i >> 1, 62-clz(i), 1 << (62 - clz(i))); */ + /* fflush(stdout); */ + return ((uintnat) 1 << (8*sizeof(value) - 2 - clz(i))); +} + +CAMLprim value caml_int_builtin_highest_bit_byte (value i){ + return Val_int(caml_int_builtin_highest_bit(i)); +} diff --git a/patricia-tree.opam b/patricia-tree.opam index a5161d6..d47812a 100644 --- a/patricia-tree.opam +++ b/patricia-tree.opam @@ -15,7 +15,6 @@ bug-reports: "https://github.com/codex-semantics-library/patricia-tree/issues" depends: [ "ocaml" {>= "4.14"} - "zarith" {>= "1.13"} "dune" {>= "2.7"} "qcheck-core" {>= "0.21.2" & with-test} "ppx_inline_test" {>= "v0.16.0" & with-test} diff --git a/patriciaTree.ml b/patriciaTree.ml index 5fae0e5..65621f1 100644 --- a/patriciaTree.ml +++ b/patriciaTree.ml @@ -58,8 +58,8 @@ module type BASE_MAP = sig type 'map key_value_pair = KeyValue : 'a key * ('a, 'map) value -> 'map key_value_pair - val min_binding : 'a t -> 'a key_value_pair - val max_binding : 'a t -> 'a key_value_pair + val unsigned_min_binding : 'a t -> 'a key_value_pair + val unsigned_max_binding : 'a t -> 'a key_value_pair val singleton : 'a key -> ('a, 'b) value -> 'b t val cardinal : 'a t -> int val is_singleton : 'a t -> 'a key_value_pair option @@ -67,8 +67,8 @@ module type BASE_MAP = sig val find_opt : 'key key -> 'map t -> ('key, 'map) value option val mem : 'key key -> 'map t -> bool val remove : 'key key -> 'map t -> 'map t - val pop_minimum: 'map t -> ('map key_value_pair * 'map t) option - val pop_maximum: 'map t -> ('map key_value_pair * 'map t) option + val pop_unsigned_minimum: 'map t -> ('map key_value_pair * 'map t) option + val pop_unsigned_maximum: 'map t -> ('map key_value_pair * 'map t) option val insert: 'a key -> (('a,'map) value option -> ('a,'map) value) -> 'map t -> 'map t val update: 'a key -> (('a,'map) value option -> ('a,'map) value option) -> 'map t -> 'map t @@ -186,10 +186,10 @@ module type HETEROGENEOUS_SET = sig val cardinal: t -> int val is_singleton: t -> any_elt option val remove: 'a elt -> t -> t - val min_elt: t -> any_elt - val max_elt: t -> any_elt - val pop_minimum: t -> (any_elt * t) option - val pop_maximum: t -> (any_elt * t) option + val unsigned_min_elt: t -> any_elt + val unsigned_max_elt: t -> any_elt + val pop_unsigned_minimum: t -> (any_elt * t) option + val pop_unsigned_maximum: t -> (any_elt * t) option val union: t -> t -> t val inter: t -> t -> t val disjoint: t -> t -> bool @@ -241,10 +241,10 @@ module type SET = sig val cardinal: t -> int val is_singleton: t -> elt option val remove: elt -> t -> t - val min_elt: t -> elt - val max_elt: t -> elt - val pop_minimum: t -> (elt * t) option - val pop_maximum: t -> (elt * t) option + val unsigned_min_elt: t -> elt + val unsigned_max_elt: t -> elt + val pop_unsigned_minimum: t -> (elt * t) option + val pop_unsigned_maximum: t -> (elt * t) option val iter: (elt -> unit) -> t -> unit val filter: (elt -> bool) -> t -> t val for_all: (elt -> bool) -> t -> bool @@ -282,8 +282,8 @@ module type MAP = sig val empty : 'a t val is_empty : 'a t -> bool - val min_binding : 'a t -> (key * 'a) - val max_binding : 'a t -> (key * 'a) + val unsigned_min_binding : 'a t -> (key * 'a) + val unsigned_max_binding : 'a t -> (key * 'a) val singleton : key -> 'a -> 'a t val cardinal : 'a t -> int val is_singleton : 'a t -> (key * 'a) option @@ -291,8 +291,8 @@ module type MAP = sig val find_opt : key -> 'a t -> 'a option val mem : key -> 'a t -> bool val remove : key -> 'a t -> 'a t - val pop_minimum : 'a t -> (key * 'a * 'a t) option - val pop_maximum : 'a t -> (key * 'a * 'a t) option + val pop_unsigned_minimum : 'a t -> (key * 'a * 'a t) option + val pop_unsigned_maximum : 'a t -> (key * 'a * 'a t) option val insert : key -> ('a option -> 'a) -> 'a t -> 'a t val update : key -> ('a option -> 'a option) -> 'a t -> 'a t val add : key -> 'a -> 'a t -> 'a t @@ -368,42 +368,22 @@ end (** {1 Utility functions} *) -(** Optimized computation, but does not work for values too high. *) -(* let _highest_bit v = - (* compute highest bit. - First, set all bits with weight less than - the highest set bit *) - let v1 = v lsr 1 in - let v2 = v lsr 2 in - let v = v lor v1 in - let v = v lor v2 in - let v1 = v lsr 3 in - let v2 = v lsr 6 in - let v = v lor v1 in - let v = v lor v2 in - let v1 = v lsr 9 in - let v2 = v lsr 18 in - let v = v lor v1 in - let v = v lor v2 in - (* then get highest bit *) - (succ v) lsr 1 - -let lowest_bit x = - x land (-x) *) - -(* let rec _highest_bit x = - let m = lowest_bit x in - if x = m then - m - else - _highest_bit (x - m) *) - -let highest_bit x = - 1 lsl (Z.log2 @@ Z.of_int x) +(** Fast highest bit computation in c, using GCC's __builtin_clz + which compile to efficient instruction (bsr) when possible. *) +external highest_bit: int -> (int[@untagged]) = + "caml_int_builtin_highest_bit_byte" "caml_int_builtin_highest_bit" [@@noalloc] + +let unsigned_lt x y = x - min_int < y - min_int + (* if x >= 0 && y >= 0 + then x < y + else if x >= 0 + then (* pos < neg *) true + else if y >= 0 then false + else x < y *) (** Note: in the original version, okasaki give the masks as arguments to optimize the computation of highest_bit. *) -let branching_bit a b = highest_bit (a lxor b);; +let branching_bit a b = highest_bit (a lxor b) let mask i m = i land (lnot (2*m-1)) @@ -610,14 +590,14 @@ module MakeCustomHeterogeneous include NODE type 'map key_value_pair = KeyValue: 'a Key.t * ('a,'map) value -> 'map key_value_pair - let rec min_binding x = match NODE.view x with + let rec unsigned_min_binding x = match NODE.view x with | Empty -> raise Not_found | Leaf{key;value} -> KeyValue(key,value) - | Branch{tree0;_} -> min_binding tree0 - let rec max_binding x = match NODE.view x with + | Branch{tree0;_} -> unsigned_min_binding tree0 + let rec unsigned_max_binding x = match NODE.view x with | Empty -> raise Not_found | Leaf{key;value} -> KeyValue(key,value) - | Branch{tree1;_} -> max_binding tree1 + | Branch{tree1;_} -> unsigned_max_binding tree1 (* Merge trees whose prefix disagree. *) @@ -668,12 +648,12 @@ module MakeCustomHeterogeneous match Key.polyeq key split_key with | Eq -> NODE.empty, Some value, NODE.empty | Diff -> - if Key.to_int key < split_key_int then + if unsigned_lt (Key.to_int key) split_key_int then m, None, NODE.empty else NODE.empty, None, m end | Branch{prefix;branching_bit;tree0;tree1} -> if not (match_prefix split_key_int prefix branching_bit) then - if prefix < split_key_int + if unsigned_lt prefix split_key_int then m, None, NODE.empty else NODE.empty, None, m else if (branching_bit land split_key_int == 0) then @@ -790,8 +770,8 @@ module MakeCustomHeterogeneous in (res,restree) | Empty -> (* Can only happen in weak sets and maps. *) - raise Disappeared ;; - let pop_minimum m = match NODE.view m with + raise Disappeared + let pop_unsigned_minimum m = match NODE.view m with | Empty -> None | _ -> Some(pop_min_nonempty m) @@ -806,7 +786,7 @@ module MakeCustomHeterogeneous (* Can only happen in weak sets and maps. *) | Empty -> raise Disappeared - let pop_maximum m = match NODE.view m with + let pop_unsigned_maximum m = match NODE.view m with | Empty -> None | _ -> Some(pop_max_nonempty m) @@ -972,7 +952,7 @@ module MakeCustomHeterogeneous (reflexive_subset_domain_for_all2 f ta0 tb0) && (reflexive_subset_domain_for_all2 f ta1 tb1) (* Case where ta have to be included in one of tb0 or tb1. *) - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then reflexive_subset_domain_for_all2 f ta tb0 else reflexive_subset_domain_for_all2 f ta tb1 @@ -990,11 +970,11 @@ module MakeCustomHeterogeneous if ma == mb && pa == pb (* Same prefix: check both subtrees *) then disjoint ta0 tb0 && disjoint ta1 tb1 - else if ma > mb && match_prefix pb pa ma (* tb included in ta0 or ta1 *) + else if unsigned_lt mb ma && match_prefix pb pa ma (* tb included in ta0 or ta1 *) then if ma land pb == 0 then disjoint ta0 tb else disjoint ta1 tb - else if ma < mb && match_prefix pa pb mb (* ta included in tb0 or tb1 *) + else if unsigned_lt ma mb && match_prefix pa pb mb (* ta included in tb0 or tb1 *) then if mb land pa == 0 then disjoint ta tb0 else disjoint ta tb1 @@ -1019,11 +999,11 @@ module MakeCustomHeterogeneous let tree0 = idempotent_union f ta0 tb0 in let tree1 = idempotent_union f ta1 tb1 in branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then branch ~prefix:pa ~branching_bit:ma ~tree0:(idempotent_union f ta0 tb) ~tree1:ta1 else branch ~prefix:pa ~branching_bit:ma ~tree0:ta0 ~tree1:(idempotent_union f ta1 tb) - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then branch ~prefix:pb ~branching_bit:mb ~tree0:(idempotent_union f ta tb0) ~tree1:tb1 else branch ~prefix:pb ~branching_bit:mb ~tree0:tb0 ~tree1:(idempotent_union f ta tb1) @@ -1056,11 +1036,11 @@ module MakeCustomHeterogeneous let tree0 = idempotent_inter f ta0 tb0 in let tree1 = idempotent_inter f ta1 tb1 in branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then idempotent_inter f ta0 tb else idempotent_inter f ta1 tb - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then idempotent_inter f ta tb0 else idempotent_inter f ta tb1 @@ -1086,11 +1066,11 @@ module MakeCustomHeterogeneous let tree0 = nonidempotent_inter_no_share f ta0 tb0 in let tree1 = nonidempotent_inter_no_share f ta1 tb1 in branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then nonidempotent_inter_no_share f ta0 tb else nonidempotent_inter_no_share f ta1 tb - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then nonidempotent_inter_no_share f ta tb0 else nonidempotent_inter_no_share f ta tb1 @@ -1125,11 +1105,11 @@ module MakeCustomHeterogeneous let tree0 = idempotent_inter_filter f ta0 tb0 in let tree1 = idempotent_inter_filter f ta1 tb1 in branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then idempotent_inter_filter f ta0 tb else idempotent_inter_filter f ta1 tb - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then idempotent_inter_filter f ta tb0 else idempotent_inter_filter f ta tb1 @@ -1180,11 +1160,11 @@ module MakeCustomHeterogeneous (* Same prefix: merge the subtrees *) then branch ~prefix:pa ~branching_bit:ma ~tree0:(slow_merge f ta0 tb0) ~tree1:(slow_merge f ta1 tb1) - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then branch ~prefix:pa ~branching_bit:ma ~tree0:(slow_merge f ta0 tb) ~tree1:(upd_ta ta1) else branch ~prefix:pa ~branching_bit:ma ~tree0:(upd_ta ta0) ~tree1:(slow_merge f ta1 tb) - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then branch ~prefix:pb ~branching_bit:mb ~tree0:(slow_merge f ta tb0) ~tree1:(upd_tb tb1) else branch ~prefix:pb ~branching_bit:mb ~tree0:(upd_tb tb0) ~tree1:(slow_merge f ta tb1) @@ -1240,11 +1220,11 @@ module MakeCustomHeterogeneous if(ta0 == tree0 && ta1 == tree1) then ta else NODE.branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then nonidempotent_inter f ta0 tb else nonidempotent_inter f ta1 tb - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then nonidempotent_inter f ta tb0 else nonidempotent_inter f ta tb1 @@ -1287,7 +1267,7 @@ module MakeCustomHeterogeneous let tree1 = update_multiple_from_foreign tb1 f ta1 in if tree0 == ta0 && tree1 == ta1 then ta else branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then let ta0' = update_multiple_from_foreign tb f ta0 in @@ -1297,7 +1277,7 @@ module MakeCustomHeterogeneous let ta1' = update_multiple_from_foreign tb f ta1 in if ta1' == ta1 then ta else branch ~prefix:pa ~branching_bit:ma ~tree0:ta0 ~tree1:ta1' - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then let tree0 = update_multiple_from_foreign tb0 f ta in @@ -1337,7 +1317,7 @@ module MakeCustomHeterogeneous let tree1 = update_multiple_from_inter_with_foreign tb1 f ta1 in if tree0 == ta0 && tree1 == ta1 then ta else branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then let ta0' = update_multiple_from_inter_with_foreign tb f ta0 in @@ -1347,7 +1327,7 @@ module MakeCustomHeterogeneous let ta1' = update_multiple_from_inter_with_foreign tb f ta1 in if ta1' == ta1 then ta else branch ~prefix:pa ~branching_bit:ma ~tree0:ta0 ~tree1:ta1' - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then update_multiple_from_inter_with_foreign tb0 f ta else update_multiple_from_inter_with_foreign tb1 f ta @@ -1418,11 +1398,11 @@ module MakeHeterogeneousSet(Key:HETEROGENEOUS_KEY) : HETEROGENEOUS_SET let f: type a. a key -> unit -> 'acc -> 'acc = fun k () acc -> f.f k acc in BaseMap.fold { f } set acc - let min_elt t = let KeyValue(m, ()) = BaseMap.min_binding t in Any m - let max_elt t = let KeyValue(m, ()) = BaseMap.max_binding t in Any m + let unsigned_min_elt t = let KeyValue(m, ()) = BaseMap.unsigned_min_binding t in Any m + let unsigned_max_elt t = let KeyValue(m, ()) = BaseMap.unsigned_max_binding t in Any m - let pop_maximum t = Option.map (fun (KeyValue(m,()),t) -> Any m,t) (BaseMap.pop_maximum t) - let pop_minimum t = Option.map (fun (KeyValue(m,()),t) -> Any m,t) (BaseMap.pop_minimum t) + let pop_unsigned_maximum t = Option.map (fun (KeyValue(m,()),t) -> Any m,t) (BaseMap.pop_unsigned_maximum t) + let pop_unsigned_minimum t = Option.map (fun (KeyValue(m,()),t) -> Any m,t) (BaseMap.pop_unsigned_minimum t) type polypretty = { f: 'a. Format.formatter -> 'a key -> unit; } [@@unboxed] let pretty ?pp_sep f fmt s = BaseMap.pretty ?pp_sep { f = fun fmt k () -> f.f fmt k} fmt s @@ -1501,16 +1481,16 @@ module MakeCustom let update k f m = update k (fun v -> snd_opt (f (opt_snd v))) m let add k v m = add k (Snd v) m let split x m = let (l,m,r) = split x m in (l, opt_snd m, r) - let min_binding m = let KeyValue(key,Snd value) = BaseMap.min_binding m in key,value - let max_binding m = let KeyValue(key,Snd value) = BaseMap.max_binding m in key,value + let unsigned_min_binding m = let KeyValue(key,Snd value) = BaseMap.unsigned_min_binding m in key,value + let unsigned_max_binding m = let KeyValue(key,Snd value) = BaseMap.unsigned_max_binding m in key,value (* let singleton k v = BaseMap.singleton (PolyKey.K k) v *) - let pop_minimum m = - match BaseMap.pop_minimum m with + let pop_unsigned_minimum m = + match BaseMap.pop_unsigned_minimum m with | None -> None | Some(KeyValue(key,Snd value),m) -> Some(key,value,m) - let pop_maximum m = - match BaseMap.pop_maximum m with + let pop_unsigned_maximum m = + match BaseMap.pop_unsigned_maximum m with | None -> None | Some(KeyValue(key,Snd value),m) -> Some(key,value,m) @@ -1605,10 +1585,10 @@ module MakeSet(Key: KEY) : SET with type elt = Key.t = struct | None -> None | Some(KeyValue(k,())) -> Some k - let min_elt t = let Any x = min_elt t in x - let max_elt t = let Any x = max_elt t in x - let pop_minimum t = Option.map (fun (Any x, t) -> (x,t)) (pop_minimum t) - let pop_maximum t = Option.map (fun (Any x, t) -> (x,t)) (pop_maximum t) + let unsigned_min_elt t = let Any x = unsigned_min_elt t in x + let unsigned_max_elt t = let Any x = unsigned_max_elt t in x + let pop_unsigned_minimum t = Option.map (fun (Any x, t) -> (x,t)) (pop_unsigned_minimum t) + let pop_unsigned_maximum t = Option.map (fun (Any x, t) -> (x,t)) (pop_unsigned_maximum t) let to_seq m = Seq.map (fun (BaseMap.KeyValue(elt,())) -> elt) (BaseMap.to_seq m) let to_rev_seq m = Seq.map (fun (BaseMap.KeyValue(elt,())) -> elt) (BaseMap.to_rev_seq m) diff --git a/patriciaTree.mli b/patriciaTree.mli index 454d866..2199992 100644 --- a/patriciaTree.mli +++ b/patriciaTree.mli @@ -31,8 +31,9 @@ each key to be mapped to a unique integer identifier. - The implementation uses Patricia Tree, as described in Oksasaki - and Gill's 1998 paper "Fast mergeable integer maps", i.e. it is a - space-efficient prefix trie over the big-endian representation of + and Gill's 1998 paper + {{: https://www.semanticscholar.org/paper/Fast-Mergeable-Integer-Maps-Okasaki-Gill/23003be706e5f586f23dd7fa5b2a410cc91b659d}{i Fast mergeable integer maps}}, + i.e. it is a space-efficient prefix trie over the big-endian representation of the key's integer identifier. The main benefit of Patricia Tree is that their representation @@ -48,9 +49,12 @@ notably (key,value) pairs or different types to be in the same map, or to choose the memory representation of the nodes of the tree. - - Some operations like [pop_minimum] and [pop_maximum] make our Set + - Some operations like {{!BASE_MAP.pop_unsigned_minimum}[pop_unsigned_minimum]} and + {{!BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]} make our Set suitable as priority queue (but remember that each element in the - queue must map to a distinct integer). *) + queue must map to a distinct integer, and that using the {{!unsigned_lt}unsigned order} + means elements with negative priority are seen as greater than elements with + positive ones). *) (** Note on complexity: in the following, n represents the size of the map when there is one (and [|map1|] is the number of elements in @@ -66,6 +70,40 @@ type intkey type mask +val unsigned_lt : int -> int -> bool +(** All integers comparisons in this library are done according to their + {b unsigned representation}. This is the same as signed comparison for same + sign integers, but all negative integers are greater than the positives. + This means [-1] is the greatest possible number, and [0] is the smallest. + {[ + # unsigned_lt 2 (-1);; + - bool : true + # unsigned_lt max_int min_int;; + - bool : true + # unsigned_lt 3 2;; + - bool : false + # unsigned_lt 2 3;; + - bool : true + # unsigned_lt (-2) (-3);; + - bool : false + # unsigned_lt (-4) (-3);; + - bool : true + # unsigned_lt 0 0;; + - bool : false + ]} + + @since 0.10.0 *) + +(**/**) + +val highest_bit : int -> (int[@untagged]) +(** [highest_bit x] is an integer with a single bit set: the highest set bit of [x]. + exported for test purposes only. + + @since 0.10.0 *) + +(**/**) + (** {1 Nodes} *) (** This module explains how a node is stored in memory, with @@ -166,13 +204,18 @@ module type BASE_MAP = sig (** {3 Basic functions} *) - val min_binding : 'a t -> 'a key_value_pair - (** @raises Not_found if the map is empty *) + val unsigned_min_binding : 'a t -> 'a key_value_pair + (** [unsigned_min_binding m] is minimal binding [KeyValue(k,v)] of the map, + using the {{!unsigned_lt}unsigned order} on [Key.to_int]. + @raises Not_found if the map is empty *) - val max_binding : 'a t -> 'a key_value_pair - (** @raises Not_found if the map is empty *) + val unsigned_max_binding : 'a t -> 'a key_value_pair + (** [unsigned_max_binding m] is maximal binding [KeyValue(k,v)] of the map, + using the {{!unsigned_lt}unsigned order} on [Key.to_int]. + @raises Not_found if the map is empty *) val singleton : 'a key -> ('a, 'b) value -> 'b t + (** Create a map with a single binding. *) val cardinal : 'a t -> int (** The size of the map, O(n) complexity *) @@ -182,10 +225,11 @@ module type BASE_MAP = sig [m] contains a unique binding [k->v]. *) val find : 'key key -> 'map t -> ('key, 'map) value - (** @raises Not_found if key is absent from map *) + (** [find key map] returns the value associated with [key] in [map] if present. + @raises Not_found if [key] is absent from map *) val find_opt : 'key key -> 'map t -> ('key, 'map) value option - (** Same as [find], but returns [None] for Not_found *) + (** Same as {!find}, but returns [None] for Not_found *) val mem : 'key key -> 'map t -> bool (** [mem key map] returns [true] iff [key] is bound in [map], O(log(n)) complexity. *) @@ -194,13 +238,17 @@ module type BASE_MAP = sig (** Returns a map with the element removed, O(log(n)) complexity. Returns a physically equal map if the element is absent. *) - val pop_minimum: 'map t -> ('map key_value_pair * 'map t) option - (** [pop_minimum m] returns [None] if [is_empty m], or [Some(key,value,m')] where - [(key,value) = min_binding m] and [m' = remove m key]. O(log(n)) complexity. *) + val pop_unsigned_minimum: 'map t -> ('map key_value_pair * 'map t) option + (** [pop_unsigned_minimum m] returns [None] if [is_empty m], or [Some(key,value,m')] where + [(key,value) = unsigned_min_binding m] and [m' = remove m key]. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. + O(log(n)) complexity. *) - val pop_maximum: 'map t -> ('map key_value_pair * 'map t) option - (** [pop_maximum m] returns [None] if [is_empty m], or [Some(key,value,m')] where - [(key,value) = max_binding m] and [m' = remove m key]. O(log(n)) complexity. *) + val pop_unsigned_maximum: 'map t -> ('map key_value_pair * 'map t) option + (** [pop_unsigned_maximum m] returns [None] if [is_empty m], or [Some(key,value,m')] where + [(key,value) = unsigned_max_binding m] and [m' = remove m key]. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. + O(log(n)) complexity. *) val insert: 'a key -> (('a,'map) value option -> ('a,'map) value) -> 'map t -> 'map t (** [insert key f map] modifies or insert an element of the map; [f] @@ -230,23 +278,24 @@ module type BASE_MAP = sig - submap of [map] whose keys are smaller than [key] - value associated to [key] (if present) - submap of [map] whose keys are bigger than [key] - Where the order is given by [Key.to_int]. *) + Where the order is given by the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) type 'map polyiter = { f : 'a. 'a key -> ('a, 'map) value -> unit; } [@@unboxed] val iter : 'map polyiter -> 'map t -> unit - (** [iter f m] calls [f.f] on all bindings of [m], in the order given by [Key.to_int] *) + (** [iter f m] calls [f.f] on all bindings of [m], + in the {{!unsigned_lt}unsigned order} on [Key.to_int] *) type ('acc,'map) polyfold = { f: 'a. 'a key -> ('a,'map) value -> 'acc -> 'acc } [@@unboxed] val fold : ('acc,'map) polyfold -> 'map t -> 'acc -> 'acc (** [fold f m acc] returns [f.f key_n value_n (... (f.f key_1 value_1 acc))] where [(key_1, value_1) ... (key_n, value_n)] are the bindings of [m], in - the order given by [Key.to_int]. *) + the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) type 'map polypredicate = { f: 'a. 'a key -> ('a,'map) value -> bool; } [@@unboxed] val filter : 'map polypredicate -> 'map t -> 'map t (** [filter f m] returns the submap of [m] containing the bindings [k->v] such that [f.f k v = true]. - [f.f] is called in the order given by [Key.to_int] *) + [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int] *) val for_all : 'map polypredicate -> 'map t -> bool (** [for_all f m] checks that [f] holds on all bindings of [m]7 @@ -262,14 +311,14 @@ module type BASE_MAP = sig val map : ('map,'map) polymap -> 'map t -> 'map t val map_no_share : ('map1,'map2) polymap -> 'map1 t -> 'map2 t (** [map f m] and [map_no_share f m] replace all bindings [(k,v)] by [(k, f.f v)]. - Bindings are examined in the order given by [Key.to_int]. *) + Bindings are examined in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) type ('map1,'map2) polymapi = { f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value; } [@@unboxed] val mapi : ('map,'map) polymapi -> 'map t -> 'map t val mapi_no_share : ('map1,'map2) polymapi -> 'map1 t -> 'map2 t (** [mapi f m] and [mapi_no_share f m] replace all bindings [(k,v)] by [(k, f.f k v)]. - Bindings are examined in the order given by [Key.to_int]. *) + Bindings are examined in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) type ('map1,'map2) polyfilter_map = { f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value option; } [@@unboxed] @@ -278,7 +327,7 @@ module type BASE_MAP = sig (** [filter_map m f] and [filter_map_no_share m f] remove the bindings [(k,v)] for which [f.f k v] is [None], and replaces the bindings [(k,v)] for which [f.f k v] is [Some v'] by [(k,v')]. - Bindings are examined in the order given by [Key.to_int]. *) + Bindings are examined in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) type 'map polypretty = { f: 'a. Format.formatter -> 'a key -> ('a, 'map) value -> unit } [@@unboxed] val pretty : @@ -287,7 +336,7 @@ module type BASE_MAP = sig (** Pretty-prints a map using the given formatter. [pp_sep] is called once between each binding, it defaults to [Format.pp_print_cut]. - Bindings are printed in the order given by [Key.to_int] *) + Bindings are printed in the {{!unsigned_lt}unsigned order} of [Key.to_int] *) (** {3 Functions on pairs of maps} *) @@ -300,7 +349,7 @@ module type BASE_MAP = sig - [m1] and [m2] have the same domain (set of keys) - for all bindings [(k, v1)] in [m1] and [(k, v2)] in [m2], [f.f k v1 v2] holds @assumes [f.f] is reflexive, i.e. [f.f k v v = true] to skip calls to equal subtrees. - Calls [f.f] in ascending order of [Key.to_int]. + Calls [f.f] in ascending {{!unsigned_lt}unsigned order} of [Key.to_int]. Exits early if the domains mismatch. It is useful to implement equality on maps: @@ -315,7 +364,7 @@ module type BASE_MAP = sig ('map1,'map2) polysame_domain_for_all2 -> 'map1 t -> 'map2 t -> bool (** [nonreflexive_same_domain_for_all2 f m1 m2] is the same as {!reflexive_same_domain_for_all2}, but doesn't assume [f.f] is reflexive. - It thus calls [f.f] on every binding, in ascending order of [Key.to_int]. + It thus calls [f.f] on every binding, in ascending {{!unsigned_lt}unsigned order} of [Key.to_int]. Exits early if the domains mismatch. *) val reflexive_subset_domain_for_all2 : @@ -324,7 +373,7 @@ module type BASE_MAP = sig - [m1]'s domain is a subset of [m2]'s. (all keys defined in [m1] are also defined in [m2]) - for all bindings [(k, v1)] in [m1] and [(k, v2)] in [m2], [f.f k v1 v2] holds @assumes [f.f] is reflexive, i.e. [f.f k v v = true] to skip calls to equal subtrees. - Calls [f.f] in ascending order of [Key.to_int]. + Calls [f.f] in ascending {{!unsigned_lt}unsigned order} of [Key.to_int]. Exits early if the domains mismatch. *) type ('map1, 'map2, 'map3) polyunion = { @@ -334,7 +383,7 @@ module type BASE_MAP = sig union of the keys of [map1] and [map2]. [f.f] is used to combine the values of keys mapped in both maps. @assumes [f.f] idempotent (i.e. [f key value value == value]) - [f.f] is called in the order given by [Key.to_int]. + [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. [f.f] is never called on physically equal values. Preserves physical equality as much as possible. Complexity is O(log(n)*Delta) where Delta is the number of @@ -348,7 +397,7 @@ module type BASE_MAP = sig intersection of the keys of [map1] and [map2]. [f.f] is used to combine the values a key is mapped in both maps. @assumes [f.f] idempotent (i.e. [f key value value == value]) - [f.f] is called in the order given by [Key.to_int]. + [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. [f.f] is never called on physically equal values. Preserves physical equality as much as possible. Complexity is O(log(n)*Delta) where Delta is the number of @@ -358,7 +407,7 @@ module type BASE_MAP = sig (** [nonidempotent_inter_no_share f map1 map2] is the same as {!idempotent_inter} but doesn't preverse physical equality, doesn't assume [f.f] is idempotent, and can change the type of values. [f.f] is called on every shared binding. - [f.f] is called in increasing order of keys. + [f.f] is called in increasing {{!unsigned_lt}unsigned order} of keys. O(n) complexity *) @@ -370,7 +419,7 @@ module type BASE_MAP = sig type ('map1, 'map2, 'map3) polymerge = { f : 'a. 'a key -> ('a, 'map1) value option -> ('a, 'map2) value option -> ('a, 'map3) value option; } [@@unboxed] val slow_merge : ('map1, 'map2, 'map3) polymerge -> 'map1 t -> 'map2 t -> 'map3 t - (** This is the same as {!Stdlib.Map.S.merge} *) + (** This is the same as {{: https://ocaml.org/manual/5.1/api/Map.S.html#VALmerge}Stdlib.Map.S.merge} *) val disjoint : 'a t -> 'a t -> bool (** [disjoint m1 m2] is [true] iff [m1] and [m2] have disjoint domains *) @@ -378,10 +427,10 @@ module type BASE_MAP = sig (** {3 Conversion functions} *) val to_seq : 'a t -> 'a key_value_pair Seq.t - (** [to_seq m] iterates the whole map, in increasing order of [Key.to_int] *) + (** [to_seq m] iterates the whole map, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val to_rev_seq : 'a t -> 'a key_value_pair Seq.t - (** [to_rev_seq m] iterates the whole map, in decreasing order of [Key.to_int] *) + (** [to_rev_seq m] iterates the whole map, in decreasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val add_seq : 'a key_value_pair Seq.t -> 'a t -> 'a t (** [add_seq s m] adds all bindings of the sequence [s] to [m] in order. *) @@ -395,7 +444,7 @@ module type BASE_MAP = sig If a key is bound multiple times in [l], the latest binding is kept *) val to_list : 'a t -> 'a key_value_pair list - (** [to_list m] returns the bindings of [m] as a list, in increasing order of [Key.to_int] *) + (** [to_list m] returns the bindings of [m] as a list, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) end (** {2 Heterogeneous maps and sets} *) @@ -418,7 +467,8 @@ module type HETEROGENEOUS_MAP = sig include BASE_MAP - (** Operation with maps/set of different types *) + (** Operation with maps/set of different types. + [Map2] must use the same [Key.to_int] function. *) module WithForeign(Map2:BASE_MAP with type 'a key = 'a key):sig type ('map1,'map2) polyinter_foreign = { f: 'a. 'a key -> ('a,'map1) value -> ('a,'map2) Map2.value -> ('a,'map1) value } [@@unboxed] @@ -439,7 +489,7 @@ module type HETEROGENEOUS_MAP = sig i.e. [update_multiple_from_foreign m_from f m_to] calls [f.f] on every key of [m_from], says if the corresponding value also exists in [m_to], and adds or remove the element in [m_to] depending on the value of [f.f]. - [f.f] is called in the order of [Key.to_int]. + [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. O(size(m_from) + size(m_to)) complexity. *) type ('map1,'map2) polyupdate_multiple_inter = { f: 'a. 'a key -> ('a,'map1) value -> ('a,'map2) Map2.value -> ('a,'map1) value option } [@@unboxed] @@ -509,21 +559,25 @@ module type HETEROGENEOUS_SET = sig (** [remove elt set] returns a set containing all elements of [set] except [elt]. Returns a value physically equal to [set] if [elt] is not present. *) - val min_elt: t -> any_elt - (** The minimal element if non empty. + val unsigned_min_elt: t -> any_elt + (** The minimal element if non empty, according to the + {{!unsigned_lt}unsigned order} on elements. @raises Not_found *) - val max_elt: t -> any_elt - (** The maximal element if non empty. + val unsigned_max_elt: t -> any_elt + (** The maximal element if non empty, according to the + {{!unsigned_lt}unsigned order} on elements. @raises Not_found *) - val pop_minimum: t -> (any_elt * t) option - (** [pop_minimum s] is [Some (elt, s')] where [elt = min_elt s] and [s' = remove elt s] - if [s] is non empty. *) + val pop_unsigned_minimum: t -> (any_elt * t) option + (** [pop_unsigned_minimum s] is [Some (elt, s')] where [elt = unsigned_min_elt s] and [s' = remove elt s] + if [s] is non empty. + Uses the {{!unsigned_lt}unsigned order} on elements. *) - val pop_maximum: t -> (any_elt * t) option - (** [pop_maximum s] is [Some (elt, s')] where [elt = max_elt s] and [s' = remove elt s] - if [s] is non empty. *) + val pop_unsigned_maximum: t -> (any_elt * t) option + (** [pop_unsigned_maximum s] is [Some (elt, s')] where [elt = unsigned_max_elt s] and [s' = remove elt s] + if [s] is non empty. + Uses the {{!unsigned_lt}unsigned order} on elements. *) (** {3 Functions on pairs of sets} *) @@ -547,27 +601,28 @@ module type HETEROGENEOUS_SET = sig val split: 'a elt -> t -> t * bool * t (** [split elt set] returns [s_lt, present, s_gt] where [s_lt] contains all elements of [set] smaller than [elt], [s_gt] - all those greater than [elt], and [present] is [true] if [elt] is in [set]. *) + all those greater than [elt], and [present] is [true] if [elt] is in [set]. + Uses the {{!unsigned_lt}unsigned order} on elements. *) (** {3 Iterators} *) type polyiter = { f: 'a. 'a elt -> unit; } [@@unboxed] val iter: polyiter -> t -> unit - (** [iter f set] calls [f.f] on all elements of [set], in order of [Key.to_int]. *) + (** [iter f set] calls [f.f] on all elements of [set], in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) type polypredicate = { f: 'a. 'a elt -> bool; } [@@unboxed] val filter: polypredicate -> t -> t (** [filter f set] is the subset of [set] that only contains the elements that - satisfy [f.f]. [f.f] is called in order of [Key.to_int]. *) + satisfy [f.f]. [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) val for_all: polypredicate -> t -> bool (** [for_all f set] is [true] if [f.f] is [true] on all elements of [set]. - Short-circuits on first [false]. [f.f] is called in order of [Key.to_int]. *) + Short-circuits on first [false]. [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) type 'acc polyfold = { f: 'a. 'a elt -> 'acc -> 'acc } [@@unboxed] val fold: 'acc polyfold -> t -> 'acc -> 'acc (** [fold f set acc] returns [f.f elt_n (... (f.f elt_1 acc) ...)], where - [elt_1, ..., elt_n] are the elements of [set], in increasing order of + [elt_1, ..., elt_n] are the elements of [set], in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) type polypretty = { f: 'a. Format.formatter -> 'a elt -> unit; } [@@unboxed] @@ -579,10 +634,10 @@ module type HETEROGENEOUS_SET = sig (** {3 Conversion functions} *) val to_seq : t -> any_elt Seq.t - (** [to_seq st] iterates the whole set, in increasing order of [Key.to_int] *) + (** [to_seq st] iterates the whole set, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val to_rev_seq : t -> any_elt Seq.t - (** [to_rev_seq st] iterates the whole set, in decreasing order of [Key.to_int] *) + (** [to_rev_seq st] iterates the whole set, in decreasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val add_seq : any_elt Seq.t -> t -> t (** [add_seq s st] adds all elements of the sequence [s] to [st] in order. *) @@ -594,7 +649,7 @@ module type HETEROGENEOUS_SET = sig (** [of_list l] creates a new set from the elements of [l]. *) val to_list : t -> any_elt list - (** [to_list s] returns the elements of [s] as a list, in increasing order of [Key.to_int] *) + (** [to_list s] returns the elements of [s] as a list, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) end @@ -647,44 +702,47 @@ module type SET = sig (** [remove elt set] returns a set containing all elements of [set] except [elt]. Returns a value physically equal to [set] if [elt] is not present. *) - val min_elt: t -> elt - (** The minimal element if non empty. + val unsigned_min_elt: t -> elt + (** The minimal element (according to the {{!unsigned_lt}unsigned order} on [Key.to_int]) if non empty. @raises Not_found *) - val max_elt: t -> elt - (** The maximal element if non empty. + val unsigned_max_elt: t -> elt + (** The maximal element (according to the {{!unsigned_lt}unsigned order} on [Key.to_int]) if non empty. @raises Not_found *) - val pop_minimum: t -> (elt * t) option - (** [pop_minimum s] is [Some (elt, s')] where [elt = min_elt s] and [s' = remove elt s] - if [s] is non empty. *) + val pop_unsigned_minimum: t -> (elt * t) option + (** [pop_unsigned_minimum s] is [Some (elt, s')] where [elt = unsigned_min_elt s] and [s' = remove elt s] + if [s] is non empty. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) - val pop_maximum: t -> (elt * t) option - (** [pop_maximum s] is [Some (elt, s')] where [elt = max_elt s] and [s' = remove elt s] - if [s] is non empty. *) + val pop_unsigned_maximum: t -> (elt * t) option + (** [pop_unsigned_maximum s] is [Some (elt, s')] where [elt = unsigned_max_elt s] and [s' = remove elt s] + if [s] is non empty. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) (** {3 Iterators} *) val iter: (elt -> unit) -> t -> unit - (** [iter f set] calls [f] on all elements of [set], in order of [Key.to_int]. *) + (** [iter f set] calls [f] on all elements of [set], in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) val filter: (elt -> bool) -> t -> t (** [filter f set] is the subset of [set] that only contains the elements that - satisfy [f]. [f] is called in order of [Key.to_int]. *) + satisfy [f]. [f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) val for_all: (elt -> bool) -> t -> bool (** [for_all f set] is [true] if [f] is [true] on all elements of [set]. - Short-circuits on first [false]. [f] is called in order of [Key.to_int]. *) + Short-circuits on first [false]. [f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) val fold: (elt -> 'acc -> 'acc) -> t -> 'acc -> 'acc (** [fold f set acc] returns [f elt_n (... (f elt_1 acc) ...)], where - [elt_1, ..., elt_n] are the elements of [set], in increasing order of + [elt_1, ..., elt_n] are the elements of [set], in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val split: elt -> t -> t * bool * t (** [split elt set] returns [s_lt, present, s_gt] where [s_lt] contains all elements of [set] smaller than [elt], [s_gt] - all those greater than [elt], and [present] is [true] if [elt] is in [set]. *) + all those greater than [elt], and [present] is [true] if [elt] is in [set]. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int].*) val pretty : ?pp_sep:(Format.formatter -> unit -> unit) -> @@ -714,10 +772,10 @@ module type SET = sig (** {3 Conversion functions} *) val to_seq : t -> elt Seq.t - (** [to_seq st] iterates the whole set, in increasing order of [Key.to_int] *) + (** [to_seq st] iterates the whole set, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val to_rev_seq : t -> elt Seq.t - (** [to_rev_seq st] iterates the whole set, in decreasing order of [Key.to_int] *) + (** [to_rev_seq st] iterates the whole set, in decreasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val add_seq : elt Seq.t -> t -> t (** [add_seq s st] adds all elements of the sequence [s] to [st] in order. *) @@ -729,7 +787,7 @@ module type SET = sig (** [of_list l] creates a new set from the elements of [l]. *) val to_list : t -> elt list - (** [to_list s] returns the elements of [s] as a list, in increasing order of [Key.to_int] *) + (** [to_list s] returns the elements of [s] as a list, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) end (** The typechecker struggles with forall quantification on values if they @@ -763,13 +821,14 @@ module type MAP = sig val is_empty : 'a t -> bool (** Test if a map is empty; O(1) complexity. *) - val min_binding : 'a t -> (key * 'a) - (** Returns the (key,value) where [Key.to_int key] is minimal (in - unsigned representation of integers); O(log n) complexity. + val unsigned_min_binding : 'a t -> (key * 'a) + (** Returns the (key,value) where [Key.to_int key] is minimal (in the + {{!unsigned_lt}unsigned representation} of integers); O(log n) complexity. @raises Not_found if the map is empty *) - val max_binding : 'a t -> (key * 'a) - (** Returns the (key,value) where [Key.to_int key] is maximal; O(log n) complexity. + val unsigned_max_binding : 'a t -> (key * 'a) + (** Returns the (key,value) where [Key.to_int key] is maximal (in the + {{!unsigned_lt}unsigned representation} of integers); O(log n) complexity. @raises Not_found if the map is empty *) val singleton : key -> 'a -> 'a t @@ -794,13 +853,15 @@ module type MAP = sig (** Returns a map with the element removed, O(log(n)) complexity. Returns a physically equal map if the element is absent. *) - val pop_minimum : 'a t -> (key * 'a * 'a t) option - (** [pop_minimum m] returns [None] if [is_empty m], or [Some(key,value,m')] where - [(key,value) = min_binding m] and [m' = remove m key]. O(log(n)) complexity. *) + val pop_unsigned_minimum : 'a t -> (key * 'a * 'a t) option + (** [pop_unsigned_minimum m] returns [None] if [is_empty m], or [Some(key,value,m')] where + [(key,value) = unsigned_min_binding m] and [m' = remove m key]. O(log(n)) complexity. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) - val pop_maximum : 'a t -> (key * 'a * 'a t) option - (** [pop_maximum m] returns [None] if [is_empty m], or [Some(key,value,m')] where - [(key,value) = max_binding m] and [m' = remove m key]. O(log(n)) complexity. *) + val pop_unsigned_maximum : 'a t -> (key * 'a * 'a t) option + (** [pop_unsigned_maximum m] returns [None] if [is_empty m], or [Some(key,value,m')] where + [(key,value) = unsigned_max_binding m] and [m' = remove m key]. O(log(n)) complexity. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) val insert : key -> ('a option -> 'a) -> 'a t -> 'a t (** [insert key f map] modifies or insert an element of the map; [f] @@ -830,20 +891,21 @@ module type MAP = sig - submap of [map] whose keys are smaller than [key] - value associated to [key] (if present) - submap of [map] whose keys are bigger than [key] - Where the order is given by [Key.to_int]. *) + Using the {{!unsigned_lt}unsigned order} is given by [Key.to_int]. *) val iter : (key -> 'a -> unit) -> 'a t -> unit - (** Iterate on each (key,value) pair of the map, in increasing order of keys. *) + (** Iterate on each (key,value) pair of the map, in increasing {{!unsigned_lt}unsigned order} of keys. *) val fold : (key -> 'a -> 'acc -> 'acc) -> 'a t -> 'acc -> 'acc - (** Fold on each (key,value) pair of the map, in increasing order of keys. *) + (** Fold on each (key,value) pair of the map, in increasing {{!unsigned_lt}unsigned order} of keys. *) val filter : (key -> 'a -> bool) -> 'a t -> 'a t (** Returns the submap containing only the key->value pairs satisfying the - given predicate. [f] is called in increasing number of keys *) + given predicate. [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) val for_all : (key -> 'a -> bool) -> 'a t -> bool - (** Returns true if the predicate holds on all map bindings. Short-circuiting *) + (** Returns true if the predicate holds on all map bindings. Short-circuiting. + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) (** In the following, the *no_share function allows taking arguments of different types (but cannot share subtrees of the map), while @@ -857,12 +919,12 @@ module type MAP = sig value is physically the same (i.e. [f key value == value] for all the keys in the subtree) are guaranteed to be physically equal to the original subtree. O(n) complexity. - [f] is called in increasing order of keys. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) val map_no_share : ('a -> 'b) -> 'a t -> 'b t (** [map_no_share f m] returns a map where the [value] bound to each [key] is replaced by [f value]. O(n) complexity. - [f] is called in increasing order of keys. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) val mapi : (key -> 'a -> 'a) -> 'a t -> 'a t (** [mapi f m] returns a map where the [value] bound to each [key] is @@ -870,12 +932,12 @@ module type MAP = sig value is physically the same (i.e. [f key value == value] for all the keys in the subtree) are guaranteed to be physically equal to the original subtree. O(n) complexity. - [f] is called in increasing order of keys. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) val mapi_no_share : (key -> 'a -> 'b) -> 'a t -> 'b t (** [mapi_no_share f m] returns a map where the [value] bound to each [key] is replaced by [f key value]. O(n) complexity. - [f] is called in increasing order of keys. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) val filter_map : (key -> 'a -> 'a option) -> 'a t -> 'a t (** [filter_map m f] returns a map where the [value] bound to each @@ -885,14 +947,14 @@ module type MAP = sig (i.e. [f key value = Some v] with [value == v] for all the keys in the subtree) are guaranteed to be physically equal to the original subtree. O(n) complexity. - [f] is called in increasing order of keys. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) val filter_map_no_share : (key -> 'a -> 'b option) -> 'a t -> 'b t (** [filter_map m f] returns a map where the [value] bound to each [key] is removed (if [f key value] returns [None]), or is replaced by [v] ((if [f key value] returns [Some v]). O(n) complexity. - [f] is called in increasing order of keys. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) (** {3 Operations on pairs of maps} *) @@ -947,7 +1009,7 @@ module type MAP = sig preserve physical equality of the subtreess in that case. The complexity is O(log(n)*Delta) where Delta is the number of different keys between [map1] and [map2]. - [f] is called in increasing order of keys. + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. [f] is never called on physically equal values. *) val idempotent_inter : (key -> 'a -> 'a -> 'a) -> 'a t -> 'a t -> 'a t @@ -959,7 +1021,7 @@ module type MAP = sig preserve physical equality of the subtrees in that case. The complexity is O(log(n)*Delta) where Delta is the number of different keys between [map1] and [map2]. - [f] is called in increasing order of keys. + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. [f] is never called on physically equal values. *) val nonidempotent_inter_no_share : (key -> 'a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t @@ -969,12 +1031,12 @@ module type MAP = sig need to be idempotent, which imply that we have to visit physically equal subtrees of [map1] and [map2]. The complexity is O(log(n)*min(|map1|,|map2|)). - [f] is called in increasing order of keys. + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. [f] is called on every shared binding. *) val idempotent_inter_filter : (key -> 'a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t - (** [idempotent_inter_filter f m1 m2] is like [idempotent_inter f m1 - m2] (assuming idempotence, using and preserving physically + (** [idempotent_inter_filter f m1 m2] is like {!idempotent_inter} + (assuming idempotence, using and preserving physically equal subtrees), but it also removes the key->value bindings for which [f] returns [None]. *) @@ -987,9 +1049,8 @@ module type MAP = sig val disjoint : 'a t -> 'a t -> bool - (* Maybe: WithForeign and WithForeignHeterogeneous. *) - - (** Combination with other kinds of maps. *) + (** Combination with other kinds of maps. + [Map2] must use the same [Key.to_int] function. *) module WithForeign(Map2 : BASE_MAP with type _ key = key):sig type ('b,'c) polyfilter_map_foreign = { f: 'a. key -> ('a,'b) Map2.value -> 'c option } [@@unboxed] @@ -1011,7 +1072,7 @@ module type MAP = sig i.e. [update_multiple_from_foreign m_from f m_to] calls [f.f] on every key of [m_from], says if the corresponding value also exists in [m_to], and adds or remove the element in [m_to] depending on the value of [f.f]. - [f.f] is called in the order of [Key.to_int]. + [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. O(size(m_from) + size(m_to)) complexity. *) @@ -1033,10 +1094,10 @@ module type MAP = sig (** {3 Conversion functions} *) val to_seq : 'a t -> (key * 'a) Seq.t - (** [to_seq m] iterates the whole map, in increasing order of [Key.to_int] *) + (** [to_seq m] iterates the whole map, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val to_rev_seq : 'a t -> (key * 'a) Seq.t - (** [to_rev_seq m] iterates the whole map, in decreasing order of [Key.to_int] *) + (** [to_rev_seq m] iterates the whole map, in decreasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val add_seq : (key * 'a) Seq.t -> 'a t -> 'a t (** [add_seq s m] adds all bindings of the sequence [s] to [m] in order. *) @@ -1050,7 +1111,8 @@ module type MAP = sig If a key is bound multiple times in [l], the latest binding is kept *) val to_list : 'a t -> (key * 'a) list - (** [to_list m] returns the bindings of [m] as a list, in increasing order of [Key.to_int] *) + (** [to_list m] returns the bindings of [m] as a list, + in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) end @@ -1067,7 +1129,12 @@ module type KEY = sig object. Correctness of the operations requires that different values in a tree correspond to different integers. - Must be injective, return only positive values, and ideally fast *) + Must be injective, and ideally fast. + + Note that since Patricia Trees use {{!unsigned_lt}unsigned order}, negative + keys are seen as bigger than positive keys. + Be wary of this when using negative keys combined with functions like + {{!BASE_MAP.unsigned_max_binding}[unsigned_max_binding]} and {{!BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]}. *) val to_int: t -> int end @@ -1088,7 +1155,12 @@ module type HETEROGENEOUS_KEY = sig object. Correctness of the operations requires that different values in a tree correspond to different integers. - Must be injective, return only positive values, and ideally fast *) + Must be injective, and ideally fast. + + Note that since Patricia Trees use {{!unsigned_lt}unsigned order}, negative + keys are seen as bigger than positive keys. + Be wary of this when using negative keys combined with functions like + {{!BASE_MAP.unsigned_max_binding}[unsigned_max_binding]} and {{!BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]}. *) val polyeq : 'a t -> 'b t -> ('a, 'b) cmp (** Polymorphic equality function used to compare our keys. diff --git a/patriciaTreeTest.ml b/patriciaTreeTest.ml index 843bbdc..842bc3a 100644 --- a/patriciaTreeTest.ml +++ b/patriciaTreeTest.ml @@ -21,6 +21,36 @@ open PatriciaTree +let check_highest_bit x res = + (* Printf.printf "CHECK_HIGHEST_BIT: %x %x\n%!" x res; *) + if (x = 0) + then (res = 0) + else begin + x != 0 && + (* The result is a single bit set. *) + res land (res-1) == 0 && + (* The bit x is set. *) + x land res = res && + (* It is the highest bit. *) + x land (lnot res) land (lnot (res - 1)) = 0 + end + +let () = QCheck.Test.check_exn @@ + QCheck.Test.make ~count:1000 ~name:"highest_bit" QCheck.int (fun x -> + check_highest_bit x (highest_bit x)) + +let unsigned_lt_ref x y = + if x >= 0 && y >= 0 + then x < y + else if x >= 0 + then (* pos < neg *) true + else if y >= 0 then false + else x < y + +let () = QCheck.Test.check_exn @@ + QCheck.Test.make ~count:1000 ~name:"unsigned_lt" QCheck.(pair int int) (fun (x,y) -> + unsigned_lt x y = unsigned_lt_ref x y) + let%test_module "TestHeterogeneous" = (module struct module MyKey = struct @@ -145,12 +175,18 @@ end) (* let _m5 = inter (fun a b -> a) _m2 _m3;; *) (* let _m6 = inter (fun a b -> a) _m1 _m2;; *) +let unsigned_compare x y = + if unsigned_lt x y then -1 + else if x = y then 0 else 1 let%test_module _ = (module struct (* A model. *) module IntMap = struct - module M = Map.Make(Int) + module M = Map.Make(struct + type t = int + let compare = unsigned_compare + end) include M let subset_domain_for_all_2 m1 m2 f = let exception False in @@ -205,12 +241,12 @@ let%test_module _ = (module struct | None, _ | _, None -> None | Some a, Some b -> (f key a b)) m1 m2 - let pop_minimum m = + let pop_unsigned_minimum m = match M.min_binding m with | exception Not_found -> None | (key,value) -> Some(key,value,M.remove key m) - let pop_maximum m = + let pop_unsigned_maximum m = match M.max_binding m with | exception Not_found -> None | (key,value) -> Some(key,value,M.remove key m) @@ -252,10 +288,12 @@ let%test_module _ = (module struct let third = extend_map first alist3 in (second,third) + let number_gen = QCheck.int + let gen = QCheck.(triple - (small_list (pair small_nat small_nat)) - (small_list (pair small_nat small_nat)) - (small_list (pair small_nat small_nat)));; + (small_list (pair number_gen number_gen)) + (small_list (pair number_gen number_gen)) + (small_list (pair number_gen number_gen))) let model_from_gen x = let (m1,m2) = two_maps_from_three_lists x in @@ -280,22 +318,22 @@ let%test_module _ = (module struct module Foreign = MyMap.WithForeign(MyMap.BaseMap) - let test_pop_minimum = QCheck.Test.make ~count:1000 ~name:"pop_minimum" - QCheck.(small_list (pair small_nat small_nat)) (fun x -> + let test_pop_minimum = QCheck.Test.make ~count:1000 ~name:"pop_unsigned_minimum" + QCheck.(small_list (pair number_gen number_gen)) (fun x -> let m = extend_map MyMap.empty x in let model = intmap_of_mymap m in - match MyMap.pop_minimum m, IntMap.pop_minimum model with + match MyMap.pop_unsigned_minimum m, IntMap.pop_unsigned_minimum model with | None, Some _ | Some _, None -> false | None, None -> true | Some(key1,val1,m'), Some(key2,val2,model') -> key1 = key2 && val1 = val2 && IntMap.equal (=) (intmap_of_mymap m') model') let () = QCheck.Test.check_exn test_pop_minimum - let test_pop_maximum = QCheck.Test.make ~count:1000 ~name:"pop_maximum" - QCheck.(small_list (pair small_nat small_nat)) (fun x -> + let test_pop_maximum = QCheck.Test.make ~count:1000 ~name:"pop_unsigned_maximum" + QCheck.(small_list (pair number_gen number_gen)) (fun x -> let m = extend_map MyMap.empty x in let model = intmap_of_mymap m in - match MyMap.pop_maximum m, IntMap.pop_maximum model with + match MyMap.pop_unsigned_maximum m, IntMap.pop_unsigned_maximum model with | None, Some _ | Some _, None -> false | None, None -> true | Some(key1,val1,m'), Some(key2,val2,model') -> @@ -310,7 +348,7 @@ let%test_module _ = (module struct let () = match !seen with | None -> () | Some old_key_int -> - if old_key_int < key_int + if unsigned_compare old_key_int key_int < 0 then () else QCheck.Test.fail_reportf "Non increasing calls to f : key %d seen after %d" @@ -330,7 +368,7 @@ let%test_module _ = (module struct in f let test_map_filter = QCheck.Test.make ~count:1000 ~name:"map_filter" - QCheck.(small_list (pair small_nat small_nat)) (fun x -> + QCheck.(small_list (pair number_gen number_gen)) (fun x -> let m1 = extend_map MyMap.empty x in let model1 = intmap_of_mymap m1 in let chk_calls1 = check_increases () in @@ -490,6 +528,27 @@ let%test_module _ = (module struct (* Printf.printf "res is %b\n%!" @@ IntMap.equal (=) modelres myres; *) modelres == myres) let () = QCheck.Test.check_exn test_disjoint + + let%test "negative_keys" = + let map = MyMap.add 0 0 MyMap.empty in + let _pp_l fmt = Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") + (fun fmt (k,l) -> Format.fprintf fmt "(%x, %x)" k l) fmt in + let map2 = MyMap.add min_int 5 map in + let map3 = MyMap.add max_int 8 map2 in + let map4 = MyMap.add 25 8 map2 in + let map5 = MyMap.idempotent_inter_filter (fun _ _ _ -> None) map3 map4 in + (* Format.printf "[%a]@." pp_l (MyMap.to_list map3); + Format.printf "[%a]@." pp_l (MyMap.to_list map4); + Format.printf "[%a]@." pp_l (MyMap.to_list map5); + (match MyMap.BaseMap.view map3 with + | Branch{prefix; branching_bit; _} -> Format.printf "%x : %x@." (Obj.magic prefix) (Obj.magic branching_bit) + | _ -> () + ); *) + MyMap.to_list map = [(0,0)] && + MyMap.to_list map2 = [(0,0); (min_int,5)] && + MyMap.to_list map3 = [(0,0); (max_int,8); (min_int,5)] && + MyMap.to_list map4 = [(0,0); (25,8); (min_int,5)] && + MyMap.to_list map5 = MyMap.to_list map2 end)