From b2086bff4b2df4b382626bd3c3e689f8ac1dcf60 Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Fri, 26 Apr 2024 14:00:53 +0200 Subject: [PATCH 01/14] Switch from Z.log2 to one coded in c --- dune | 5 +- int_builtins.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++ patriciaTree.ml | 36 ++--------- 3 files changed, 166 insertions(+), 33 deletions(-) create mode 100644 int_builtins.c diff --git a/dune b/dune index 17e5fd7..64b7b1a 100644 --- a/dune +++ b/dune @@ -23,7 +23,10 @@ (name PatriciaTree) (public_name patricia-tree) (libraries zarith) - (modules PatriciaTree)) + (modules PatriciaTree) + (foreign_stubs + (language c) + (names int_builtins))) (documentation (package patricia-tree)) diff --git a/int_builtins.c b/int_builtins.c new file mode 100644 index 0000000..b776163 --- /dev/null +++ b/int_builtins.c @@ -0,0 +1,158 @@ +/**************************************************************************/ +/* This file is part of the Codex semantics library. */ +/* */ +/* Copyright (C) 2013-2024 */ +/* CEA (Commissariat à l'énergie atomique et aux énergies */ +/* alternatives) */ +/* */ +/* you can redistribute it and/or modify it under the terms of the GNU */ +/* Lesser General Public License as published by the Free Software */ +/* Foundation, version 2.1. */ +/* */ +/* It is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU Lesser General Public License for more details. */ +/* */ +/* See the GNU Lesser General Public License version 2.1 */ +/* for more details (enclosed in the file LICENSE). */ +/* */ +/**************************************************************************/ + +#define CAML_NAME_SPACE +#include +#include +#include + +#ifdef _MSC_VER +#include +#endif + +__attribute__((__always_inline__)) static unsigned int clz(unsigned int v) { + /* Note: on a 64 bit platform, GCC's _builtin_clz will perform a 32 + bit operation (even if the argument has type int). We have to use + _builtin_clzll instead. */ +#if __GNUC__ +#ifdef ARCH_SIXTYFOUR + return __builtin_clzll(v); +#else + return __builtin_clz(v) +#endif +#endif +#ifdef _MSC_VER + int res = 0; +#ifdef ARCH_SIXTYFOUR + _BitScanReverse64(&res, v); +#else + _BitScanReverse(&res, v); +#endif + return res; +#endif +} + +/**************** Log2 (with rounding to the floor). ****************/ +__attribute__((__always_inline__)) static uintnat +inline_caml_int_builtin_log2(value i) { + /* log2(v) is normally 32-1-clz(v), but because of the tag we + must substract one more. */ + return (8 * sizeof(value) - 2 - clz(i)); +} + +CAMLprim uintnat caml_int_builtin_log2(value i) { + return inline_caml_int_builtin_log2(i); +} + +CAMLprim uintnat caml_int_builtin_log2_untagged(uintnat i) { + return (8 * sizeof(value) - 1 - clz(i)); +} + +CAMLprim value caml_int_builtin_log2_byte(value i) { + return Val_int(inline_caml_int_builtin_log2(i)); +} + +/**************** Highest bit ****************/ +CAMLprim uintnat caml_int_builtin_highest_bit(value i) { + return (1 << inline_caml_int_builtin_log2(i)); +} + +CAMLprim uintnat caml_int_builtin_highest_bit_untagged(uintnat i) { + return (1 << caml_int_builtin_log2_untagged(i)); +} + +CAMLprim value caml_int_builtin_highest_bit_byte(value i) { + return Val_int(1 << inline_caml_int_builtin_log2(i)); +} + +/**************** Find first set ****************/ + +__attribute__((__always_inline__)) static unsigned int ffs(unsigned int v) { + /* Note: on a 64 bit platform, GCC's _builtin_ffs will perform a 32 + bit operation (even if the argument has type int). We have to use + _builtin_ffsll instead. */ +#if __GNUC__ +#ifdef ARCH_SIXTYFOUR + return __builtin_ffsll(v); +#else + return __builtin_ffs(v) +#endif +#endif +#ifdef _MSC_VER +#error Not done. Maybe using BitScanReverse. +#endif +} + +CAMLprim uintnat caml_int_builtin_ffs_untagged(uintnat i) { return ffs(i); } + +CAMLprim value caml_int_builtin_ffs_byte(value i) { + return Val_int(ffs(Int_val(i))); +} + +/**************** Count trailing zeroes. ****************/ +__attribute__((__always_inline__)) static unsigned int ctz(unsigned int v) { +#if __GNUC__ +#ifdef ARCH_SIXTYFOUR + return __builtin_ctzll(v); +#else + return __builtin_ctz(v) +#endif +#endif +#ifdef _MSC_VER + int res = 0; +#ifdef ARCH_SIXTYFOUR + _BitScanForward64(&res, v); +#else + _BitScanForward(&res, v); +#endif + return res; +#endif +} + +CAMLprim uintnat caml_int_builtin_ctz_untagged(uintnat i) { return ctz(i); } + +CAMLprim value caml_int_builtin_ctz_byte(value i) { + return Val_int(ctz(Int_val(i))); +} + +/**************** Popcount. ****************/ +/**************** Count trailing zeroes. ****************/ +__attribute__((__always_inline__)) static unsigned int +popcount(unsigned int v) { +#if __GNUC__ +#ifdef ARCH_SIXTYFOUR + return __builtin_popcountll(v); +#else + return __builtin_popcount(v) +#endif +#endif +#ifdef _MSC_VER + TODO +#endif +} + +CAMLprim uintnat caml_int_builtin_popcount_untagged(uintnat i) { + return popcount(i); +} + +CAMLprim value caml_int_builtin_popcount_byte(value i) { + return Val_int(popcount(Int_val(i))); +} diff --git a/patriciaTree.ml b/patriciaTree.ml index 5fae0e5..c8e8cd9 100644 --- a/patriciaTree.ml +++ b/patriciaTree.ml @@ -368,38 +368,10 @@ end (** {1 Utility functions} *) -(** Optimized computation, but does not work for values too high. *) -(* let _highest_bit v = - (* compute highest bit. - First, set all bits with weight less than - the highest set bit *) - let v1 = v lsr 1 in - let v2 = v lsr 2 in - let v = v lor v1 in - let v = v lor v2 in - let v1 = v lsr 3 in - let v2 = v lsr 6 in - let v = v lor v1 in - let v = v lor v2 in - let v1 = v lsr 9 in - let v2 = v lsr 18 in - let v = v lor v1 in - let v = v lor v2 in - (* then get highest bit *) - (succ v) lsr 1 - -let lowest_bit x = - x land (-x) *) - -(* let rec _highest_bit x = - let m = lowest_bit x in - if x = m then - m - else - _highest_bit (x - m) *) - -let highest_bit x = - 1 lsl (Z.log2 @@ Z.of_int x) +(** Fast highest bit computation in c, using GCC's __builtin_clz + which compile to efficient instruction (bsr) when possible. *) +external highest_bit: int -> (int[@untagged]) = + "caml_int_builtin_highest_bit_byte" "caml_int_builtin_highest_bit" [@@noalloc] (** Note: in the original version, okasaki give the masks as arguments to optimize the computation of highest_bit. *) From bbdc8186b0b9e748ffed34a51afdb5a39e0b3f6b Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Fri, 26 Apr 2024 14:01:50 +0200 Subject: [PATCH 02/14] Remove zarith dependency --- dune | 3 +-- dune-project | 2 -- patricia-tree.opam | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/dune b/dune index 64b7b1a..1285ad0 100644 --- a/dune +++ b/dune @@ -22,7 +22,6 @@ (library (name PatriciaTree) (public_name patricia-tree) - (libraries zarith) (modules PatriciaTree) (foreign_stubs (language c) @@ -39,5 +38,5 @@ (libraries qcheck-core)) (preprocess (pps ppx_inline_test)) - (libraries PatriciaTree zarith qcheck-core) + (libraries PatriciaTree qcheck-core) (modules PatriciaTreeTest)) diff --git a/dune-project b/dune-project index 91a1a26..621fb83 100644 --- a/dune-project +++ b/dune-project @@ -52,8 +52,6 @@ (depends (ocaml (>= 4.14)) - (zarith - (>= "1.13")) dune (qcheck-core (and diff --git a/patricia-tree.opam b/patricia-tree.opam index a5161d6..d47812a 100644 --- a/patricia-tree.opam +++ b/patricia-tree.opam @@ -15,7 +15,6 @@ bug-reports: "https://github.com/codex-semantics-library/patricia-tree/issues" depends: [ "ocaml" {>= "4.14"} - "zarith" {>= "1.13"} "dune" {>= "2.7"} "qcheck-core" {>= "0.21.2" & with-test} "ppx_inline_test" {>= "v0.16.0" & with-test} From 89d8ab3aad2106a437a6f9702c511302b4298bfe Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Fri, 26 Apr 2024 14:11:16 +0200 Subject: [PATCH 03/14] Trim int_builtins to minimal subsection --- int_builtins.c | 110 +++---------------------------------------------- 1 file changed, 5 insertions(+), 105 deletions(-) diff --git a/int_builtins.c b/int_builtins.c index b776163..5d97f32 100644 --- a/int_builtins.c +++ b/int_builtins.c @@ -28,7 +28,7 @@ #include #endif -__attribute__((__always_inline__)) static unsigned int clz(unsigned int v) { +__attribute__((__always_inline__)) static inline unsigned int clz(unsigned int v) { /* Note: on a 64 bit platform, GCC's _builtin_clz will perform a 32 bit operation (even if the argument has type int). We have to use _builtin_clzll instead. */ @@ -36,7 +36,7 @@ __attribute__((__always_inline__)) static unsigned int clz(unsigned int v) { #ifdef ARCH_SIXTYFOUR return __builtin_clzll(v); #else - return __builtin_clz(v) + return __builtin_clz(v); #endif #endif #ifdef _MSC_VER @@ -50,109 +50,9 @@ __attribute__((__always_inline__)) static unsigned int clz(unsigned int v) { #endif } -/**************** Log2 (with rounding to the floor). ****************/ -__attribute__((__always_inline__)) static uintnat -inline_caml_int_builtin_log2(value i) { - /* log2(v) is normally 32-1-clz(v), but because of the tag we - must substract one more. */ - return (8 * sizeof(value) - 2 - clz(i)); -} - -CAMLprim uintnat caml_int_builtin_log2(value i) { - return inline_caml_int_builtin_log2(i); -} - -CAMLprim uintnat caml_int_builtin_log2_untagged(uintnat i) { - return (8 * sizeof(value) - 1 - clz(i)); -} - -CAMLprim value caml_int_builtin_log2_byte(value i) { - return Val_int(inline_caml_int_builtin_log2(i)); -} - /**************** Highest bit ****************/ CAMLprim uintnat caml_int_builtin_highest_bit(value i) { - return (1 << inline_caml_int_builtin_log2(i)); -} - -CAMLprim uintnat caml_int_builtin_highest_bit_untagged(uintnat i) { - return (1 << caml_int_builtin_log2_untagged(i)); -} - -CAMLprim value caml_int_builtin_highest_bit_byte(value i) { - return Val_int(1 << inline_caml_int_builtin_log2(i)); -} - -/**************** Find first set ****************/ - -__attribute__((__always_inline__)) static unsigned int ffs(unsigned int v) { - /* Note: on a 64 bit platform, GCC's _builtin_ffs will perform a 32 - bit operation (even if the argument has type int). We have to use - _builtin_ffsll instead. */ -#if __GNUC__ -#ifdef ARCH_SIXTYFOUR - return __builtin_ffsll(v); -#else - return __builtin_ffs(v) -#endif -#endif -#ifdef _MSC_VER -#error Not done. Maybe using BitScanReverse. -#endif -} - -CAMLprim uintnat caml_int_builtin_ffs_untagged(uintnat i) { return ffs(i); } - -CAMLprim value caml_int_builtin_ffs_byte(value i) { - return Val_int(ffs(Int_val(i))); -} - -/**************** Count trailing zeroes. ****************/ -__attribute__((__always_inline__)) static unsigned int ctz(unsigned int v) { -#if __GNUC__ -#ifdef ARCH_SIXTYFOUR - return __builtin_ctzll(v); -#else - return __builtin_ctz(v) -#endif -#endif -#ifdef _MSC_VER - int res = 0; -#ifdef ARCH_SIXTYFOUR - _BitScanForward64(&res, v); -#else - _BitScanForward(&res, v); -#endif - return res; -#endif -} - -CAMLprim uintnat caml_int_builtin_ctz_untagged(uintnat i) { return ctz(i); } - -CAMLprim value caml_int_builtin_ctz_byte(value i) { - return Val_int(ctz(Int_val(i))); -} - -/**************** Popcount. ****************/ -/**************** Count trailing zeroes. ****************/ -__attribute__((__always_inline__)) static unsigned int -popcount(unsigned int v) { -#if __GNUC__ -#ifdef ARCH_SIXTYFOUR - return __builtin_popcountll(v); -#else - return __builtin_popcount(v) -#endif -#endif -#ifdef _MSC_VER - TODO -#endif -} - -CAMLprim uintnat caml_int_builtin_popcount_untagged(uintnat i) { - return popcount(i); -} - -CAMLprim value caml_int_builtin_popcount_byte(value i) { - return Val_int(popcount(Int_val(i))); + /* log2(v) is normally 32-1-clz(v), but because of the tag we + must substract one more. */ + return (1 << (8 * sizeof(value) - 2 - clz(i))); } From 581212f55635968e1f2fe2dd8876dd624b098dc3 Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Fri, 26 Apr 2024 14:12:55 +0200 Subject: [PATCH 04/14] Changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eea5ebc..134534f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Unreleased - Fixed a bug where NodeWithId wasn't incrementing ids properly +- `zarith` is no longer a dependency, used GCC's `__builtin_clz` as a faster + method of finding an integer's highest bit # v0.9.0 - 2024-04-18 From 83b5c23dd4732931a8e4841e10a370588a64136d Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Mon, 29 Apr 2024 10:17:00 +0200 Subject: [PATCH 05/14] Fix types in int_builtins --- int_builtins.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/int_builtins.c b/int_builtins.c index 5d97f32..781725c 100644 --- a/int_builtins.c +++ b/int_builtins.c @@ -20,39 +20,42 @@ /**************************************************************************/ #define CAML_NAME_SPACE +#include #include #include -#include #ifdef _MSC_VER #include #endif -__attribute__((__always_inline__)) static inline unsigned int clz(unsigned int v) { +__attribute__((__always_inline__)) +static inline uintnat clz(uintnat v){ /* Note: on a 64 bit platform, GCC's _builtin_clz will perform a 32 bit operation (even if the argument has type int). We have to use _builtin_clzll instead. */ #if __GNUC__ -#ifdef ARCH_SIXTYFOUR - return __builtin_clzll(v); -#else - return __builtin_clz(v); -#endif + #ifdef ARCH_SIXTYFOUR + return __builtin_clzll(v); + #else + return __builtin_clz(v) + #endif #endif #ifdef _MSC_VER - int res = 0; -#ifdef ARCH_SIXTYFOUR - _BitScanReverse64(&res, v); -#else - _BitScanReverse(&res, v); -#endif - return res; -#endif + int res = 0; + #ifdef ARCH_SIXTYFOUR + _BitScanReverse64(&res,v); + #else + _BitScanReverse(&res,v); + #endif + return res; +#endif } /**************** Highest bit ****************/ -CAMLprim uintnat caml_int_builtin_highest_bit(value i) { - /* log2(v) is normally 32-1-clz(v), but because of the tag we - must substract one more. */ - return (1 << (8 * sizeof(value) - 2 - clz(i))); + +CAMLprim uintnat caml_int_builtin_highest_bit (value i){ + /* printf("Highest bit In C: %x %x %x %x\n", */ + /* i, i >> 1, 62-clz(i), 1 << (62 - clz(i))); */ + /* fflush(stdout); */ + return ((uintnat) 1 << (8*sizeof(value) - 2 - clz(i))); } From 322329c2528006853a44efaa76a00eca21597a29 Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Mon, 29 Apr 2024 11:08:22 +0200 Subject: [PATCH 06/14] Fix int_builtins from codex's fix --- int_builtins.c | 4 ++++ patriciaTree.ml | 54 ++++++++++++++++++++++++++------------------- patriciaTreeTest.ml | 49 +++++++++++++++++++++++++++++++++------- 3 files changed, 76 insertions(+), 31 deletions(-) diff --git a/int_builtins.c b/int_builtins.c index 781725c..3ed37b3 100644 --- a/int_builtins.c +++ b/int_builtins.c @@ -59,3 +59,7 @@ CAMLprim uintnat caml_int_builtin_highest_bit (value i){ /* fflush(stdout); */ return ((uintnat) 1 << (8*sizeof(value) - 2 - clz(i))); } + +CAMLprim value caml_int_builtin_highest_bit_byte (value i){ + return Val_int(caml_int_builtin_highest_bit(i)); +} diff --git a/patriciaTree.ml b/patriciaTree.ml index c8e8cd9..7a0c9e4 100644 --- a/patriciaTree.ml +++ b/patriciaTree.ml @@ -373,9 +373,17 @@ end external highest_bit: int -> (int[@untagged]) = "caml_int_builtin_highest_bit_byte" "caml_int_builtin_highest_bit" [@@noalloc] +let unsigned_lt x y = x - min_int < y - min_int + (* if x >= 0 && y >= 0 + then x < y + else if x >= 0 + then (* pos < neg *) true + else if y >= 0 then false + else x < y *) + (** Note: in the original version, okasaki give the masks as arguments to optimize the computation of highest_bit. *) -let branching_bit a b = highest_bit (a lxor b);; +let branching_bit a b = highest_bit (a lxor b) let mask i m = i land (lnot (2*m-1)) @@ -640,12 +648,12 @@ module MakeCustomHeterogeneous match Key.polyeq key split_key with | Eq -> NODE.empty, Some value, NODE.empty | Diff -> - if Key.to_int key < split_key_int then + if unsigned_lt (Key.to_int key) split_key_int then m, None, NODE.empty else NODE.empty, None, m end | Branch{prefix;branching_bit;tree0;tree1} -> if not (match_prefix split_key_int prefix branching_bit) then - if prefix < split_key_int + if unsigned_lt prefix split_key_int then m, None, NODE.empty else NODE.empty, None, m else if (branching_bit land split_key_int == 0) then @@ -762,7 +770,7 @@ module MakeCustomHeterogeneous in (res,restree) | Empty -> (* Can only happen in weak sets and maps. *) - raise Disappeared ;; + raise Disappeared let pop_minimum m = match NODE.view m with | Empty -> None | _ -> Some(pop_min_nonempty m) @@ -944,7 +952,7 @@ module MakeCustomHeterogeneous (reflexive_subset_domain_for_all2 f ta0 tb0) && (reflexive_subset_domain_for_all2 f ta1 tb1) (* Case where ta have to be included in one of tb0 or tb1. *) - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then reflexive_subset_domain_for_all2 f ta tb0 else reflexive_subset_domain_for_all2 f ta tb1 @@ -962,11 +970,11 @@ module MakeCustomHeterogeneous if ma == mb && pa == pb (* Same prefix: check both subtrees *) then disjoint ta0 tb0 && disjoint ta1 tb1 - else if ma > mb && match_prefix pb pa ma (* tb included in ta0 or ta1 *) + else if unsigned_lt mb ma && match_prefix pb pa ma (* tb included in ta0 or ta1 *) then if ma land pb == 0 then disjoint ta0 tb else disjoint ta1 tb - else if ma < mb && match_prefix pa pb mb (* ta included in tb0 or tb1 *) + else if unsigned_lt ma mb && match_prefix pa pb mb (* ta included in tb0 or tb1 *) then if mb land pa == 0 then disjoint ta tb0 else disjoint ta tb1 @@ -991,11 +999,11 @@ module MakeCustomHeterogeneous let tree0 = idempotent_union f ta0 tb0 in let tree1 = idempotent_union f ta1 tb1 in branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then branch ~prefix:pa ~branching_bit:ma ~tree0:(idempotent_union f ta0 tb) ~tree1:ta1 else branch ~prefix:pa ~branching_bit:ma ~tree0:ta0 ~tree1:(idempotent_union f ta1 tb) - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then branch ~prefix:pb ~branching_bit:mb ~tree0:(idempotent_union f ta tb0) ~tree1:tb1 else branch ~prefix:pb ~branching_bit:mb ~tree0:tb0 ~tree1:(idempotent_union f ta tb1) @@ -1028,11 +1036,11 @@ module MakeCustomHeterogeneous let tree0 = idempotent_inter f ta0 tb0 in let tree1 = idempotent_inter f ta1 tb1 in branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then idempotent_inter f ta0 tb else idempotent_inter f ta1 tb - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then idempotent_inter f ta tb0 else idempotent_inter f ta tb1 @@ -1058,11 +1066,11 @@ module MakeCustomHeterogeneous let tree0 = nonidempotent_inter_no_share f ta0 tb0 in let tree1 = nonidempotent_inter_no_share f ta1 tb1 in branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then nonidempotent_inter_no_share f ta0 tb else nonidempotent_inter_no_share f ta1 tb - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then nonidempotent_inter_no_share f ta tb0 else nonidempotent_inter_no_share f ta tb1 @@ -1097,11 +1105,11 @@ module MakeCustomHeterogeneous let tree0 = idempotent_inter_filter f ta0 tb0 in let tree1 = idempotent_inter_filter f ta1 tb1 in branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then idempotent_inter_filter f ta0 tb else idempotent_inter_filter f ta1 tb - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then idempotent_inter_filter f ta tb0 else idempotent_inter_filter f ta tb1 @@ -1152,11 +1160,11 @@ module MakeCustomHeterogeneous (* Same prefix: merge the subtrees *) then branch ~prefix:pa ~branching_bit:ma ~tree0:(slow_merge f ta0 tb0) ~tree1:(slow_merge f ta1 tb1) - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then branch ~prefix:pa ~branching_bit:ma ~tree0:(slow_merge f ta0 tb) ~tree1:(upd_ta ta1) else branch ~prefix:pa ~branching_bit:ma ~tree0:(upd_ta ta0) ~tree1:(slow_merge f ta1 tb) - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then branch ~prefix:pb ~branching_bit:mb ~tree0:(slow_merge f ta tb0) ~tree1:(upd_tb tb1) else branch ~prefix:pb ~branching_bit:mb ~tree0:(upd_tb tb0) ~tree1:(slow_merge f ta tb1) @@ -1212,11 +1220,11 @@ module MakeCustomHeterogeneous if(ta0 == tree0 && ta1 == tree1) then ta else NODE.branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then nonidempotent_inter f ta0 tb else nonidempotent_inter f ta1 tb - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then nonidempotent_inter f ta tb0 else nonidempotent_inter f ta tb1 @@ -1259,7 +1267,7 @@ module MakeCustomHeterogeneous let tree1 = update_multiple_from_foreign tb1 f ta1 in if tree0 == ta0 && tree1 == ta1 then ta else branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then let ta0' = update_multiple_from_foreign tb f ta0 in @@ -1269,7 +1277,7 @@ module MakeCustomHeterogeneous let ta1' = update_multiple_from_foreign tb f ta1 in if ta1' == ta1 then ta else branch ~prefix:pa ~branching_bit:ma ~tree0:ta0 ~tree1:ta1' - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then let tree0 = update_multiple_from_foreign tb0 f ta in @@ -1309,7 +1317,7 @@ module MakeCustomHeterogeneous let tree1 = update_multiple_from_inter_with_foreign tb1 f ta1 in if tree0 == ta0 && tree1 == ta1 then ta else branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if ma > mb && match_prefix pb pa ma + else if unsigned_lt mb ma && match_prefix pb pa ma then if ma land pb == 0 then let ta0' = update_multiple_from_inter_with_foreign tb f ta0 in @@ -1319,7 +1327,7 @@ module MakeCustomHeterogeneous let ta1' = update_multiple_from_inter_with_foreign tb f ta1 in if ta1' == ta1 then ta else branch ~prefix:pa ~branching_bit:ma ~tree0:ta0 ~tree1:ta1' - else if ma < mb && match_prefix pa pb mb + else if unsigned_lt ma mb && match_prefix pa pb mb then if mb land pa == 0 then update_multiple_from_inter_with_foreign tb0 f ta else update_multiple_from_inter_with_foreign tb1 f ta diff --git a/patriciaTreeTest.ml b/patriciaTreeTest.ml index 843bbdc..398fa2b 100644 --- a/patriciaTreeTest.ml +++ b/patriciaTreeTest.ml @@ -145,12 +145,22 @@ end) (* let _m5 = inter (fun a b -> a) _m2 _m3;; *) (* let _m6 = inter (fun a b -> a) _m1 _m2;; *) +let unsigned_compare x y = + if x >= 0 && y >= 0 + then compare x y + else if x >= 0 + then (* pos < neg *) -1 + else if y >= 0 then 1 + else compare x y let%test_module _ = (module struct (* A model. *) module IntMap = struct - module M = Map.Make(Int) + module M = Map.Make(struct + type t = int + let compare = unsigned_compare + end) include M let subset_domain_for_all_2 m1 m2 f = let exception False in @@ -252,10 +262,12 @@ let%test_module _ = (module struct let third = extend_map first alist3 in (second,third) + let nat_gen = QCheck.int + let gen = QCheck.(triple - (small_list (pair small_nat small_nat)) - (small_list (pair small_nat small_nat)) - (small_list (pair small_nat small_nat)));; + (small_list (pair nat_gen nat_gen)) + (small_list (pair nat_gen nat_gen)) + (small_list (pair nat_gen nat_gen)));; let model_from_gen x = let (m1,m2) = two_maps_from_three_lists x in @@ -281,7 +293,7 @@ let%test_module _ = (module struct module Foreign = MyMap.WithForeign(MyMap.BaseMap) let test_pop_minimum = QCheck.Test.make ~count:1000 ~name:"pop_minimum" - QCheck.(small_list (pair small_nat small_nat)) (fun x -> + QCheck.(small_list (pair nat_gen nat_gen)) (fun x -> let m = extend_map MyMap.empty x in let model = intmap_of_mymap m in match MyMap.pop_minimum m, IntMap.pop_minimum model with @@ -292,7 +304,7 @@ let%test_module _ = (module struct let () = QCheck.Test.check_exn test_pop_minimum let test_pop_maximum = QCheck.Test.make ~count:1000 ~name:"pop_maximum" - QCheck.(small_list (pair small_nat small_nat)) (fun x -> + QCheck.(small_list (pair nat_gen nat_gen)) (fun x -> let m = extend_map MyMap.empty x in let model = intmap_of_mymap m in match MyMap.pop_maximum m, IntMap.pop_maximum model with @@ -310,7 +322,7 @@ let%test_module _ = (module struct let () = match !seen with | None -> () | Some old_key_int -> - if old_key_int < key_int + if unsigned_compare old_key_int key_int < 0 then () else QCheck.Test.fail_reportf "Non increasing calls to f : key %d seen after %d" @@ -330,7 +342,7 @@ let%test_module _ = (module struct in f let test_map_filter = QCheck.Test.make ~count:1000 ~name:"map_filter" - QCheck.(small_list (pair small_nat small_nat)) (fun x -> + QCheck.(small_list (pair nat_gen nat_gen)) (fun x -> let m1 = extend_map MyMap.empty x in let model1 = intmap_of_mymap m1 in let chk_calls1 = check_increases () in @@ -490,6 +502,27 @@ let%test_module _ = (module struct (* Printf.printf "res is %b\n%!" @@ IntMap.equal (=) modelres myres; *) modelres == myres) let () = QCheck.Test.check_exn test_disjoint + + let%test "negative_keys" = + let map = MyMap.add 0 0 MyMap.empty in + let _pp_l fmt = Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") + (fun fmt (k,l) -> Format.fprintf fmt "(%x, %x)" k l) fmt in + let map2 = MyMap.add min_int 5 map in + let map3 = MyMap.add max_int 8 map2 in + let map4 = MyMap.add 25 8 map2 in + let map5 = MyMap.idempotent_inter_filter (fun _ _ _ -> None) map3 map4 in + (* Format.printf "[%a]@." pp_l (MyMap.to_list map3); + Format.printf "[%a]@." pp_l (MyMap.to_list map4); + Format.printf "[%a]@." pp_l (MyMap.to_list map5); + (match MyMap.BaseMap.view map3 with + | Branch{prefix; branching_bit; _} -> Format.printf "%x : %x@." (Obj.magic prefix) (Obj.magic branching_bit) + | _ -> () + ); *) + MyMap.to_list map = [(0,0)] && + MyMap.to_list map2 = [(0,0); (min_int,5)] && + MyMap.to_list map3 = [(0,0); (max_int,8); (min_int,5)] && + MyMap.to_list map4 = [(0,0); (25,8); (min_int,5)] && + MyMap.to_list map5 = MyMap.to_list map2 end) From c372b6e01b187964255430be956f8438c5064300 Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Mon, 29 Apr 2024 11:39:23 +0200 Subject: [PATCH 07/14] Explicit order in doc --- index.mld | 3 +- patriciaTree.mli | 194 +++++++++++++++++++++++++++----------------- patriciaTreeTest.ml | 8 +- 3 files changed, 125 insertions(+), 80 deletions(-) diff --git a/index.mld b/index.mld index e1deba6..44b9b4f 100644 --- a/index.mld +++ b/index.mld @@ -40,7 +40,8 @@ dune build @doc {1 Features} {ul -{li Similar to OCaml's [Map] and [Set], using the same function names when possible +{li Similar to OCaml's {{: https://ocaml.org/api/Map.S.html}[Map]} and {{: https://ocaml.org/api/Set.S.html}[Set]}, + using the same function names when possible and the same convention for order of arguments. This should allow switching to and from Patricia Tree with minimal effort.} {li The functor parameters ({!PatriciaTree.KEY} module) requires an injective [to_int : t -> int] diff --git a/patriciaTree.mli b/patriciaTree.mli index 454d866..f59a303 100644 --- a/patriciaTree.mli +++ b/patriciaTree.mli @@ -66,6 +66,30 @@ type intkey type mask +val unsigned_lt : int -> int -> bool +(** All integers comparisons in this library are done according to their + {b unsigned representation}. This is the same as signed comparison for same + sign integers, but all negative integers are greater than the positives. + This means [-1] is the greatest possible number, and [0] is the smallest. + {[ + # unsigned_lt 2 (-1);; + - bool : true + # unsigned_lt max_int min_int;; + - bool : true + # unsigned_lt 3 2;; + - bool : false + # unsigned_lt 2 3;; + - bool : true + # unsigned_lt (-2) (-3);; + - bool : false + # unsigned_lt (-4) (-3);; + - bool : true + # unsigned_lt 0 0;; + - bool : false + ]} + + @since 0.10.0 *) + (** {1 Nodes} *) (** This module explains how a node is stored in memory, with @@ -167,12 +191,17 @@ module type BASE_MAP = sig (** {3 Basic functions} *) val min_binding : 'a t -> 'a key_value_pair - (** @raises Not_found if the map is empty *) + (** [min_binding m] is minimal binding [KeyValue(k,v)] of the map, + using the {{!unsigned_lt}unsigned order} on [Key.to_int]. + @raises Not_found if the map is empty *) val max_binding : 'a t -> 'a key_value_pair - (** @raises Not_found if the map is empty *) + (** [max_binding m] is maximal binding [KeyValue(k,v)] of the map, + using the {{!unsigned_lt}unsigned order} on [Key.to_int]. + @raises Not_found if the map is empty *) val singleton : 'a key -> ('a, 'b) value -> 'b t + (** Create a map with a single binding. *) val cardinal : 'a t -> int (** The size of the map, O(n) complexity *) @@ -182,10 +211,11 @@ module type BASE_MAP = sig [m] contains a unique binding [k->v]. *) val find : 'key key -> 'map t -> ('key, 'map) value - (** @raises Not_found if key is absent from map *) + (** [find key map] returns the value associated with [key] in [map] if present. + @raises Not_found if [key] is absent from map *) val find_opt : 'key key -> 'map t -> ('key, 'map) value option - (** Same as [find], but returns [None] for Not_found *) + (** Same as {!find}, but returns [None] for Not_found *) val mem : 'key key -> 'map t -> bool (** [mem key map] returns [true] iff [key] is bound in [map], O(log(n)) complexity. *) @@ -196,11 +226,15 @@ module type BASE_MAP = sig val pop_minimum: 'map t -> ('map key_value_pair * 'map t) option (** [pop_minimum m] returns [None] if [is_empty m], or [Some(key,value,m')] where - [(key,value) = min_binding m] and [m' = remove m key]. O(log(n)) complexity. *) + [(key,value) = min_binding m] and [m' = remove m key]. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. + O(log(n)) complexity. *) val pop_maximum: 'map t -> ('map key_value_pair * 'map t) option (** [pop_maximum m] returns [None] if [is_empty m], or [Some(key,value,m')] where - [(key,value) = max_binding m] and [m' = remove m key]. O(log(n)) complexity. *) + [(key,value) = max_binding m] and [m' = remove m key]. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. + O(log(n)) complexity. *) val insert: 'a key -> (('a,'map) value option -> ('a,'map) value) -> 'map t -> 'map t (** [insert key f map] modifies or insert an element of the map; [f] @@ -230,23 +264,24 @@ module type BASE_MAP = sig - submap of [map] whose keys are smaller than [key] - value associated to [key] (if present) - submap of [map] whose keys are bigger than [key] - Where the order is given by [Key.to_int]. *) + Where the order is given by the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) type 'map polyiter = { f : 'a. 'a key -> ('a, 'map) value -> unit; } [@@unboxed] val iter : 'map polyiter -> 'map t -> unit - (** [iter f m] calls [f.f] on all bindings of [m], in the order given by [Key.to_int] *) + (** [iter f m] calls [f.f] on all bindings of [m], + in the {{!unsigned_lt}unsigned order} on [Key.to_int] *) type ('acc,'map) polyfold = { f: 'a. 'a key -> ('a,'map) value -> 'acc -> 'acc } [@@unboxed] val fold : ('acc,'map) polyfold -> 'map t -> 'acc -> 'acc (** [fold f m acc] returns [f.f key_n value_n (... (f.f key_1 value_1 acc))] where [(key_1, value_1) ... (key_n, value_n)] are the bindings of [m], in - the order given by [Key.to_int]. *) + the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) type 'map polypredicate = { f: 'a. 'a key -> ('a,'map) value -> bool; } [@@unboxed] val filter : 'map polypredicate -> 'map t -> 'map t (** [filter f m] returns the submap of [m] containing the bindings [k->v] such that [f.f k v = true]. - [f.f] is called in the order given by [Key.to_int] *) + [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int] *) val for_all : 'map polypredicate -> 'map t -> bool (** [for_all f m] checks that [f] holds on all bindings of [m]7 @@ -262,14 +297,14 @@ module type BASE_MAP = sig val map : ('map,'map) polymap -> 'map t -> 'map t val map_no_share : ('map1,'map2) polymap -> 'map1 t -> 'map2 t (** [map f m] and [map_no_share f m] replace all bindings [(k,v)] by [(k, f.f v)]. - Bindings are examined in the order given by [Key.to_int]. *) + Bindings are examined in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) type ('map1,'map2) polymapi = { f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value; } [@@unboxed] val mapi : ('map,'map) polymapi -> 'map t -> 'map t val mapi_no_share : ('map1,'map2) polymapi -> 'map1 t -> 'map2 t (** [mapi f m] and [mapi_no_share f m] replace all bindings [(k,v)] by [(k, f.f k v)]. - Bindings are examined in the order given by [Key.to_int]. *) + Bindings are examined in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) type ('map1,'map2) polyfilter_map = { f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value option; } [@@unboxed] @@ -278,7 +313,7 @@ module type BASE_MAP = sig (** [filter_map m f] and [filter_map_no_share m f] remove the bindings [(k,v)] for which [f.f k v] is [None], and replaces the bindings [(k,v)] for which [f.f k v] is [Some v'] by [(k,v')]. - Bindings are examined in the order given by [Key.to_int]. *) + Bindings are examined in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) type 'map polypretty = { f: 'a. Format.formatter -> 'a key -> ('a, 'map) value -> unit } [@@unboxed] val pretty : @@ -287,7 +322,7 @@ module type BASE_MAP = sig (** Pretty-prints a map using the given formatter. [pp_sep] is called once between each binding, it defaults to [Format.pp_print_cut]. - Bindings are printed in the order given by [Key.to_int] *) + Bindings are printed in the {{!unsigned_lt}unsigned order} of [Key.to_int] *) (** {3 Functions on pairs of maps} *) @@ -300,7 +335,7 @@ module type BASE_MAP = sig - [m1] and [m2] have the same domain (set of keys) - for all bindings [(k, v1)] in [m1] and [(k, v2)] in [m2], [f.f k v1 v2] holds @assumes [f.f] is reflexive, i.e. [f.f k v v = true] to skip calls to equal subtrees. - Calls [f.f] in ascending order of [Key.to_int]. + Calls [f.f] in ascending {{!unsigned_lt}unsigned order} of [Key.to_int]. Exits early if the domains mismatch. It is useful to implement equality on maps: @@ -315,7 +350,7 @@ module type BASE_MAP = sig ('map1,'map2) polysame_domain_for_all2 -> 'map1 t -> 'map2 t -> bool (** [nonreflexive_same_domain_for_all2 f m1 m2] is the same as {!reflexive_same_domain_for_all2}, but doesn't assume [f.f] is reflexive. - It thus calls [f.f] on every binding, in ascending order of [Key.to_int]. + It thus calls [f.f] on every binding, in ascending {{!unsigned_lt}unsigned order} of [Key.to_int]. Exits early if the domains mismatch. *) val reflexive_subset_domain_for_all2 : @@ -324,7 +359,7 @@ module type BASE_MAP = sig - [m1]'s domain is a subset of [m2]'s. (all keys defined in [m1] are also defined in [m2]) - for all bindings [(k, v1)] in [m1] and [(k, v2)] in [m2], [f.f k v1 v2] holds @assumes [f.f] is reflexive, i.e. [f.f k v v = true] to skip calls to equal subtrees. - Calls [f.f] in ascending order of [Key.to_int]. + Calls [f.f] in ascending {{!unsigned_lt}unsigned order} of [Key.to_int]. Exits early if the domains mismatch. *) type ('map1, 'map2, 'map3) polyunion = { @@ -334,7 +369,7 @@ module type BASE_MAP = sig union of the keys of [map1] and [map2]. [f.f] is used to combine the values of keys mapped in both maps. @assumes [f.f] idempotent (i.e. [f key value value == value]) - [f.f] is called in the order given by [Key.to_int]. + [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. [f.f] is never called on physically equal values. Preserves physical equality as much as possible. Complexity is O(log(n)*Delta) where Delta is the number of @@ -348,7 +383,7 @@ module type BASE_MAP = sig intersection of the keys of [map1] and [map2]. [f.f] is used to combine the values a key is mapped in both maps. @assumes [f.f] idempotent (i.e. [f key value value == value]) - [f.f] is called in the order given by [Key.to_int]. + [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. [f.f] is never called on physically equal values. Preserves physical equality as much as possible. Complexity is O(log(n)*Delta) where Delta is the number of @@ -358,7 +393,7 @@ module type BASE_MAP = sig (** [nonidempotent_inter_no_share f map1 map2] is the same as {!idempotent_inter} but doesn't preverse physical equality, doesn't assume [f.f] is idempotent, and can change the type of values. [f.f] is called on every shared binding. - [f.f] is called in increasing order of keys. + [f.f] is called in increasing {{!unsigned_lt}unsigned order} of keys. O(n) complexity *) @@ -378,10 +413,10 @@ module type BASE_MAP = sig (** {3 Conversion functions} *) val to_seq : 'a t -> 'a key_value_pair Seq.t - (** [to_seq m] iterates the whole map, in increasing order of [Key.to_int] *) + (** [to_seq m] iterates the whole map, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val to_rev_seq : 'a t -> 'a key_value_pair Seq.t - (** [to_rev_seq m] iterates the whole map, in decreasing order of [Key.to_int] *) + (** [to_rev_seq m] iterates the whole map, in decreasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val add_seq : 'a key_value_pair Seq.t -> 'a t -> 'a t (** [add_seq s m] adds all bindings of the sequence [s] to [m] in order. *) @@ -395,7 +430,7 @@ module type BASE_MAP = sig If a key is bound multiple times in [l], the latest binding is kept *) val to_list : 'a t -> 'a key_value_pair list - (** [to_list m] returns the bindings of [m] as a list, in increasing order of [Key.to_int] *) + (** [to_list m] returns the bindings of [m] as a list, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) end (** {2 Heterogeneous maps and sets} *) @@ -439,7 +474,7 @@ module type HETEROGENEOUS_MAP = sig i.e. [update_multiple_from_foreign m_from f m_to] calls [f.f] on every key of [m_from], says if the corresponding value also exists in [m_to], and adds or remove the element in [m_to] depending on the value of [f.f]. - [f.f] is called in the order of [Key.to_int]. + [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. O(size(m_from) + size(m_to)) complexity. *) type ('map1,'map2) polyupdate_multiple_inter = { f: 'a. 'a key -> ('a,'map1) value -> ('a,'map2) Map2.value -> ('a,'map1) value option } [@@unboxed] @@ -510,20 +545,24 @@ module type HETEROGENEOUS_SET = sig Returns a value physically equal to [set] if [elt] is not present. *) val min_elt: t -> any_elt - (** The minimal element if non empty. + (** The minimal element if non empty, according to the + {{!unsigned_lt}unsigned order} on elements. @raises Not_found *) val max_elt: t -> any_elt - (** The maximal element if non empty. + (** The maximal element if non empty, according to the + {{!unsigned_lt}unsigned order} on elements. @raises Not_found *) val pop_minimum: t -> (any_elt * t) option (** [pop_minimum s] is [Some (elt, s')] where [elt = min_elt s] and [s' = remove elt s] - if [s] is non empty. *) + if [s] is non empty. + Uses the {{!unsigned_lt}unsigned order} on elements. *) val pop_maximum: t -> (any_elt * t) option (** [pop_maximum s] is [Some (elt, s')] where [elt = max_elt s] and [s' = remove elt s] - if [s] is non empty. *) + if [s] is non empty. + Uses the {{!unsigned_lt}unsigned order} on elements. *) (** {3 Functions on pairs of sets} *) @@ -547,27 +586,28 @@ module type HETEROGENEOUS_SET = sig val split: 'a elt -> t -> t * bool * t (** [split elt set] returns [s_lt, present, s_gt] where [s_lt] contains all elements of [set] smaller than [elt], [s_gt] - all those greater than [elt], and [present] is [true] if [elt] is in [set]. *) + all those greater than [elt], and [present] is [true] if [elt] is in [set]. + Uses the {{!unsigned_lt}unsigned order} on elements. *) (** {3 Iterators} *) type polyiter = { f: 'a. 'a elt -> unit; } [@@unboxed] val iter: polyiter -> t -> unit - (** [iter f set] calls [f.f] on all elements of [set], in order of [Key.to_int]. *) + (** [iter f set] calls [f.f] on all elements of [set], in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) type polypredicate = { f: 'a. 'a elt -> bool; } [@@unboxed] val filter: polypredicate -> t -> t (** [filter f set] is the subset of [set] that only contains the elements that - satisfy [f.f]. [f.f] is called in order of [Key.to_int]. *) + satisfy [f.f]. [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) val for_all: polypredicate -> t -> bool (** [for_all f set] is [true] if [f.f] is [true] on all elements of [set]. - Short-circuits on first [false]. [f.f] is called in order of [Key.to_int]. *) + Short-circuits on first [false]. [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) type 'acc polyfold = { f: 'a. 'a elt -> 'acc -> 'acc } [@@unboxed] val fold: 'acc polyfold -> t -> 'acc -> 'acc (** [fold f set acc] returns [f.f elt_n (... (f.f elt_1 acc) ...)], where - [elt_1, ..., elt_n] are the elements of [set], in increasing order of + [elt_1, ..., elt_n] are the elements of [set], in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) type polypretty = { f: 'a. Format.formatter -> 'a elt -> unit; } [@@unboxed] @@ -579,10 +619,10 @@ module type HETEROGENEOUS_SET = sig (** {3 Conversion functions} *) val to_seq : t -> any_elt Seq.t - (** [to_seq st] iterates the whole set, in increasing order of [Key.to_int] *) + (** [to_seq st] iterates the whole set, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val to_rev_seq : t -> any_elt Seq.t - (** [to_rev_seq st] iterates the whole set, in decreasing order of [Key.to_int] *) + (** [to_rev_seq st] iterates the whole set, in decreasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val add_seq : any_elt Seq.t -> t -> t (** [add_seq s st] adds all elements of the sequence [s] to [st] in order. *) @@ -594,7 +634,7 @@ module type HETEROGENEOUS_SET = sig (** [of_list l] creates a new set from the elements of [l]. *) val to_list : t -> any_elt list - (** [to_list s] returns the elements of [s] as a list, in increasing order of [Key.to_int] *) + (** [to_list s] returns the elements of [s] as a list, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) end @@ -648,43 +688,46 @@ module type SET = sig Returns a value physically equal to [set] if [elt] is not present. *) val min_elt: t -> elt - (** The minimal element if non empty. + (** The minimal element (according to the {{!unsigned_lt}unsigned order} on [Key.to_int]) if non empty. @raises Not_found *) val max_elt: t -> elt - (** The maximal element if non empty. + (** The maximal element (according to the {{!unsigned_lt}unsigned order} on [Key.to_int]) if non empty. @raises Not_found *) val pop_minimum: t -> (elt * t) option (** [pop_minimum s] is [Some (elt, s')] where [elt = min_elt s] and [s' = remove elt s] - if [s] is non empty. *) + if [s] is non empty. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) val pop_maximum: t -> (elt * t) option (** [pop_maximum s] is [Some (elt, s')] where [elt = max_elt s] and [s' = remove elt s] - if [s] is non empty. *) + if [s] is non empty. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) (** {3 Iterators} *) val iter: (elt -> unit) -> t -> unit - (** [iter f set] calls [f] on all elements of [set], in order of [Key.to_int]. *) + (** [iter f set] calls [f] on all elements of [set], in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) val filter: (elt -> bool) -> t -> t (** [filter f set] is the subset of [set] that only contains the elements that - satisfy [f]. [f] is called in order of [Key.to_int]. *) + satisfy [f]. [f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) val for_all: (elt -> bool) -> t -> bool (** [for_all f set] is [true] if [f] is [true] on all elements of [set]. - Short-circuits on first [false]. [f] is called in order of [Key.to_int]. *) + Short-circuits on first [false]. [f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. *) val fold: (elt -> 'acc -> 'acc) -> t -> 'acc -> 'acc (** [fold f set acc] returns [f elt_n (... (f elt_1 acc) ...)], where - [elt_1, ..., elt_n] are the elements of [set], in increasing order of + [elt_1, ..., elt_n] are the elements of [set], in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val split: elt -> t -> t * bool * t (** [split elt set] returns [s_lt, present, s_gt] where [s_lt] contains all elements of [set] smaller than [elt], [s_gt] - all those greater than [elt], and [present] is [true] if [elt] is in [set]. *) + all those greater than [elt], and [present] is [true] if [elt] is in [set]. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int].*) val pretty : ?pp_sep:(Format.formatter -> unit -> unit) -> @@ -714,10 +757,10 @@ module type SET = sig (** {3 Conversion functions} *) val to_seq : t -> elt Seq.t - (** [to_seq st] iterates the whole set, in increasing order of [Key.to_int] *) + (** [to_seq st] iterates the whole set, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val to_rev_seq : t -> elt Seq.t - (** [to_rev_seq st] iterates the whole set, in decreasing order of [Key.to_int] *) + (** [to_rev_seq st] iterates the whole set, in decreasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val add_seq : elt Seq.t -> t -> t (** [add_seq s st] adds all elements of the sequence [s] to [st] in order. *) @@ -729,7 +772,7 @@ module type SET = sig (** [of_list l] creates a new set from the elements of [l]. *) val to_list : t -> elt list - (** [to_list s] returns the elements of [s] as a list, in increasing order of [Key.to_int] *) + (** [to_list s] returns the elements of [s] as a list, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) end (** The typechecker struggles with forall quantification on values if they @@ -764,12 +807,13 @@ module type MAP = sig (** Test if a map is empty; O(1) complexity. *) val min_binding : 'a t -> (key * 'a) - (** Returns the (key,value) where [Key.to_int key] is minimal (in - unsigned representation of integers); O(log n) complexity. + (** Returns the (key,value) where [Key.to_int key] is minimal (in the + {{!unsigned_lt}unsigned representation} of integers); O(log n) complexity. @raises Not_found if the map is empty *) val max_binding : 'a t -> (key * 'a) - (** Returns the (key,value) where [Key.to_int key] is maximal; O(log n) complexity. + (** Returns the (key,value) where [Key.to_int key] is maximal (in the + {{!unsigned_lt}unsigned representation} of integers); O(log n) complexity. @raises Not_found if the map is empty *) val singleton : key -> 'a -> 'a t @@ -796,11 +840,13 @@ module type MAP = sig val pop_minimum : 'a t -> (key * 'a * 'a t) option (** [pop_minimum m] returns [None] if [is_empty m], or [Some(key,value,m')] where - [(key,value) = min_binding m] and [m' = remove m key]. O(log(n)) complexity. *) + [(key,value) = min_binding m] and [m' = remove m key]. O(log(n)) complexity. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) val pop_maximum : 'a t -> (key * 'a * 'a t) option (** [pop_maximum m] returns [None] if [is_empty m], or [Some(key,value,m')] where - [(key,value) = max_binding m] and [m' = remove m key]. O(log(n)) complexity. *) + [(key,value) = max_binding m] and [m' = remove m key]. O(log(n)) complexity. + Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) val insert : key -> ('a option -> 'a) -> 'a t -> 'a t (** [insert key f map] modifies or insert an element of the map; [f] @@ -830,20 +876,21 @@ module type MAP = sig - submap of [map] whose keys are smaller than [key] - value associated to [key] (if present) - submap of [map] whose keys are bigger than [key] - Where the order is given by [Key.to_int]. *) + Using the {{!unsigned_lt}unsigned order} is given by [Key.to_int]. *) val iter : (key -> 'a -> unit) -> 'a t -> unit - (** Iterate on each (key,value) pair of the map, in increasing order of keys. *) + (** Iterate on each (key,value) pair of the map, in increasing {{!unsigned_lt}unsigned order} of keys. *) val fold : (key -> 'a -> 'acc -> 'acc) -> 'a t -> 'acc -> 'acc - (** Fold on each (key,value) pair of the map, in increasing order of keys. *) + (** Fold on each (key,value) pair of the map, in increasing {{!unsigned_lt}unsigned order} of keys. *) val filter : (key -> 'a -> bool) -> 'a t -> 'a t (** Returns the submap containing only the key->value pairs satisfying the - given predicate. [f] is called in increasing number of keys *) + given predicate. [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) val for_all : (key -> 'a -> bool) -> 'a t -> bool - (** Returns true if the predicate holds on all map bindings. Short-circuiting *) + (** Returns true if the predicate holds on all map bindings. Short-circuiting. + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) (** In the following, the *no_share function allows taking arguments of different types (but cannot share subtrees of the map), while @@ -857,12 +904,12 @@ module type MAP = sig value is physically the same (i.e. [f key value == value] for all the keys in the subtree) are guaranteed to be physically equal to the original subtree. O(n) complexity. - [f] is called in increasing order of keys. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) val map_no_share : ('a -> 'b) -> 'a t -> 'b t (** [map_no_share f m] returns a map where the [value] bound to each [key] is replaced by [f value]. O(n) complexity. - [f] is called in increasing order of keys. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) val mapi : (key -> 'a -> 'a) -> 'a t -> 'a t (** [mapi f m] returns a map where the [value] bound to each [key] is @@ -870,12 +917,12 @@ module type MAP = sig value is physically the same (i.e. [f key value == value] for all the keys in the subtree) are guaranteed to be physically equal to the original subtree. O(n) complexity. - [f] is called in increasing order of keys. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) val mapi_no_share : (key -> 'a -> 'b) -> 'a t -> 'b t (** [mapi_no_share f m] returns a map where the [value] bound to each [key] is replaced by [f key value]. O(n) complexity. - [f] is called in increasing order of keys. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) val filter_map : (key -> 'a -> 'a option) -> 'a t -> 'a t (** [filter_map m f] returns a map where the [value] bound to each @@ -885,14 +932,14 @@ module type MAP = sig (i.e. [f key value = Some v] with [value == v] for all the keys in the subtree) are guaranteed to be physically equal to the original subtree. O(n) complexity. - [f] is called in increasing order of keys. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) val filter_map_no_share : (key -> 'a -> 'b option) -> 'a t -> 'b t (** [filter_map m f] returns a map where the [value] bound to each [key] is removed (if [f key value] returns [None]), or is replaced by [v] ((if [f key value] returns [Some v]). O(n) complexity. - [f] is called in increasing order of keys. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. *) (** {3 Operations on pairs of maps} *) @@ -947,7 +994,7 @@ module type MAP = sig preserve physical equality of the subtreess in that case. The complexity is O(log(n)*Delta) where Delta is the number of different keys between [map1] and [map2]. - [f] is called in increasing order of keys. + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. [f] is never called on physically equal values. *) val idempotent_inter : (key -> 'a -> 'a -> 'a) -> 'a t -> 'a t -> 'a t @@ -959,7 +1006,7 @@ module type MAP = sig preserve physical equality of the subtrees in that case. The complexity is O(log(n)*Delta) where Delta is the number of different keys between [map1] and [map2]. - [f] is called in increasing order of keys. + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. [f] is never called on physically equal values. *) val nonidempotent_inter_no_share : (key -> 'a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t @@ -969,12 +1016,12 @@ module type MAP = sig need to be idempotent, which imply that we have to visit physically equal subtrees of [map1] and [map2]. The complexity is O(log(n)*min(|map1|,|map2|)). - [f] is called in increasing order of keys. + [f] is called in increasing {{!unsigned_lt}unsigned order} of keys. [f] is called on every shared binding. *) val idempotent_inter_filter : (key -> 'a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t - (** [idempotent_inter_filter f m1 m2] is like [idempotent_inter f m1 - m2] (assuming idempotence, using and preserving physically + (** [idempotent_inter_filter f m1 m2] is like {!idempotent_inter} + (assuming idempotence, using and preserving physically equal subtrees), but it also removes the key->value bindings for which [f] returns [None]. *) @@ -1011,7 +1058,7 @@ module type MAP = sig i.e. [update_multiple_from_foreign m_from f m_to] calls [f.f] on every key of [m_from], says if the corresponding value also exists in [m_to], and adds or remove the element in [m_to] depending on the value of [f.f]. - [f.f] is called in the order of [Key.to_int]. + [f.f] is called in the {{!unsigned_lt}unsigned order} of [Key.to_int]. O(size(m_from) + size(m_to)) complexity. *) @@ -1033,10 +1080,10 @@ module type MAP = sig (** {3 Conversion functions} *) val to_seq : 'a t -> (key * 'a) Seq.t - (** [to_seq m] iterates the whole map, in increasing order of [Key.to_int] *) + (** [to_seq m] iterates the whole map, in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val to_rev_seq : 'a t -> (key * 'a) Seq.t - (** [to_rev_seq m] iterates the whole map, in decreasing order of [Key.to_int] *) + (** [to_rev_seq m] iterates the whole map, in decreasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) val add_seq : (key * 'a) Seq.t -> 'a t -> 'a t (** [add_seq s m] adds all bindings of the sequence [s] to [m] in order. *) @@ -1050,7 +1097,8 @@ module type MAP = sig If a key is bound multiple times in [l], the latest binding is kept *) val to_list : 'a t -> (key * 'a) list - (** [to_list m] returns the bindings of [m] as a list, in increasing order of [Key.to_int] *) + (** [to_list m] returns the bindings of [m] as a list, + in increasing {{!unsigned_lt}unsigned order} of [Key.to_int] *) end diff --git a/patriciaTreeTest.ml b/patriciaTreeTest.ml index 398fa2b..d66d9ed 100644 --- a/patriciaTreeTest.ml +++ b/patriciaTreeTest.ml @@ -146,12 +146,8 @@ end) (* let _m6 = inter (fun a b -> a) _m1 _m2;; *) let unsigned_compare x y = - if x >= 0 && y >= 0 - then compare x y - else if x >= 0 - then (* pos < neg *) -1 - else if y >= 0 then 1 - else compare x y + if unsigned_lt x y then -1 + else if x = y then 0 else 1 let%test_module _ = (module struct From 32253d2987c44878e7fd720c69881960bab7b165 Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Mon, 29 Apr 2024 13:45:54 +0200 Subject: [PATCH 08/14] Some more documentation --- CHANGELOG.md | 2 +- README.md | 14 +++++++++----- index.mld | 16 +++++++++++----- patriciaTree.mli | 16 +++++++++++++--- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 134534f..39b2719 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ - Fixed a bug where NodeWithId wasn't incrementing ids properly - `zarith` is no longer a dependency, used GCC's `__builtin_clz` as a faster - method of finding an integer's highest bit + method of finding an integer's highest bit. # v0.9.0 - 2024-04-18 diff --git a/README.md b/README.md index 331e581..ce0c9c9 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,7 @@ dune build @doc and the same convention for order of arguments. This should allow switching to and from Patricia Tree with minimal effort. - The functor parameters (`KEY` module) requires an injective `to_int : t -> int` - function instead of a `compare` function. `to_int` should be fast, injective, - and only return positive integers. + function instead of a `compare` function. `to_int` should be fast and injective. This works well with [hash-consed](https://en.wikipedia.org/wiki/Hash_consing) types. - The Patricia Tree representation is stable, contrary to maps, inserting nodes in any order will return the same shape. @@ -77,13 +76,19 @@ dune build @doc for the general one) - Since our Patricia Tree use big-endian order on keys, the maps and sets are - sorted in increasing order of keys. We only support positive integer keys. + sorted in increasing **unsigned order** of keys. + This means negative keys are sorted above positive keys, with `-1` being the + largest possible key, and `0` the smallest. This also avoids a bug in Okasaki's paper discussed in [*QuickChecking Patricia Trees*](https://www.cs.tufts.edu/comp/150FP/archive/jan-midtgaard/qc-patricia.pdf) by Jan Mitgaard. + + It also affects functions like `min_binding` and `pop_minimum`. They will return the smallest + positive integer of both positive and negative keys are present; and not the smallest negative, + as one might expect. - Supports generic maps and sets: a `'m map` that maps `'k key` to `('k, 'm) value`. This is especially useful when using [GADTs](https://v2.ocaml.org/manual/gadts-tutorial.html) for the type of keys. This is also sometimes called a dependent map. - Allows easy and fast operations across different types of maps and set (e.g. - an intersection between a map and a set), since all sets and maps, no matter their key type, are really positive integer sets or maps. + an intersection between a map and a set), since all sets and maps, no matter their key type, are really integer sets or maps. - Multiple choices for internal representation (`NODE`), which allows for efficient storage (no need to store a value for sets), or using weak nodes only (values removed from the tree if no other pointer to it exists). This system can also be extended to store size information in nodes if needed. @@ -294,7 +299,6 @@ These are smaller and closer to OCaml's built-in Map and Set, however: - These libraries work with older version of OCaml (`>= 4.05` I believe), whereas ours requires OCaml `>= 4.14` (for the new interface of `Ephemeron` used in `WeakNode`). -- Our keys are limited to positive integers. ### dmap diff --git a/index.mld b/index.mld index 44b9b4f..24d3e33 100644 --- a/index.mld +++ b/index.mld @@ -46,7 +46,7 @@ dune build @doc and from Patricia Tree with minimal effort.} {li The functor parameters ({!PatriciaTree.KEY} module) requires an injective [to_int : t -> int] function instead of a [compare] function. {!PatriciaTree.KEY.to_int} should be fast, - injective, and only return positive integers. + and injective. This works well with {{: https://en.wikipedia.org/wiki/Hash_consing}hash-consed} types.} {li The Patricia Tree representation is stable, contrary to maps, inserting nodes in any order will return the same shape. @@ -62,16 +62,23 @@ dune build @doc [idempotent_inter] for the efficient version and [nonidempotent_inter_no_share] for the general one)} {li Since our Patricia Tree use big-endian order on keys, the maps and sets are - sorted in increasing order of keys. We only support positive integer keys. + sorted in increasing {b {{!PatriciaTree.unsigned_lt}unsigned order}} of keys. + This means negative keys are sorted above positive keys, with [-1] being the + largest possible key, and [0] the smallest. This also avoids a bug in Okasaki's paper discussed in {{: https://www.cs.tufts.edu/comp/150FP/archive/jan-midtgaard/qc-patricia.pdf}{i QuickChecking Patricia Trees}} - by Jan Mitgaard.} + by Jan Mitgaard. + + It also affects functions like {{!PatriciaTree.BASE_MAP.min_binding}[min_binding]} + and {{!PatriciaTree.BASE_MAP.pop_minimum}[pop_minimum}. They will return the smallest + positive integer of both positive and negative keys are present; and not the smallest negative, + as one might expect.} {li Supports generic maps and sets: a ['m map] that maps ['k key] to [('k, 'm) value]. This is especially useful when using {{: https://v2.ocaml.org/manual/gadts-tutorial.html}GADTs} for the type of keys. This is also sometimes called a dependent map.} {li Allows easy and fast operations across different types of maps and set (e.g. an intersection between a map and a set), since all sets and maps, no matter their key type, - are really positive integer sets or maps.} + are really integer sets or maps.} {li Multiple choices for internal representation ({!PatriciaTree.NODE}), which allows for efficient storage (no need to store a value for sets), or using weak nodes only (values removed from the tree if no other pointer to it exists). This system can also be extended to store size information in nodes if needed.} @@ -293,7 +300,6 @@ These are smaller and closer to OCaml's built-in [Map] and [Set], however: - These libraries work with older version of OCaml ([>= 4.05] I believe), whereas ours requires OCaml [>= 4.14] (for the new interface of [Ephemeron] used in {!PatriciaTree.WeakNode}). -- Our keys are limited to positive integers. {2 dmap} diff --git a/patriciaTree.mli b/patriciaTree.mli index f59a303..a6ef55b 100644 --- a/patriciaTree.mli +++ b/patriciaTree.mli @@ -405,7 +405,7 @@ module type BASE_MAP = sig type ('map1, 'map2, 'map3) polymerge = { f : 'a. 'a key -> ('a, 'map1) value option -> ('a, 'map2) value option -> ('a, 'map3) value option; } [@@unboxed] val slow_merge : ('map1, 'map2, 'map3) polymerge -> 'map1 t -> 'map2 t -> 'map3 t - (** This is the same as {!Stdlib.Map.S.merge} *) + (** This is the same as {{: https://ocaml.org/manual/5.1/api/Map.S.html#VALmerge}Stdlib.Map.S.merge} *) val disjoint : 'a t -> 'a t -> bool (** [disjoint m1 m2] is [true] iff [m1] and [m2] have disjoint domains *) @@ -1115,7 +1115,12 @@ module type KEY = sig object. Correctness of the operations requires that different values in a tree correspond to different integers. - Must be injective, return only positive values, and ideally fast *) + Must be injective, and ideally fast. + + Note that since Patricia Trees use {{!unsigned_lt}unsigned order}, negative + keys are seen as bigger than positive keys. + Be wary of this when using negative keys combined with functions like + {{!BASE_MAP.max_binding}[max_binding]} and {{!BASE_MAP.pop_maximum}[pop_maximum]}. *) val to_int: t -> int end @@ -1136,7 +1141,12 @@ module type HETEROGENEOUS_KEY = sig object. Correctness of the operations requires that different values in a tree correspond to different integers. - Must be injective, return only positive values, and ideally fast *) + Must be injective, and ideally fast. + + Note that since Patricia Trees use {{!unsigned_lt}unsigned order}, negative + keys are seen as bigger than positive keys. + Be wary of this when using negative keys combined with functions like + {{!BASE_MAP.max_binding}[max_binding]} and {{!BASE_MAP.pop_maximum}[pop_maximum]}. *) val polyeq : 'a t -> 'b t -> ('a, 'b) cmp (** Polymorphic equality function used to compare our keys. From 2905c5aa4373111de365459bf6c31c673e2d8e6f Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Mon, 29 Apr 2024 13:59:11 +0200 Subject: [PATCH 09/14] Specify "unsigned" in names of min/max functions --- CHANGELOG.md | 8 +++++ README.md | 5 ++-- index.mld | 4 +-- patriciaTree.ml | 72 ++++++++++++++++++++++----------------------- patriciaTree.mli | 66 ++++++++++++++++++++--------------------- patriciaTreeTest.ml | 12 ++++---- 6 files changed, 87 insertions(+), 80 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39b2719..61a7eac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Unreleased +- Patricia Tree now support using negative keys. Since the order used only depends + on the bitwise representation, negative keys appear bigger than any positive + keys. +- Renamed `min_binding`, `max_binding`, `pop_minimum`, `pop_maximum`, `min_elt` + and `max_elt` to `unsigned_min_binding`, `unsigned_max_binding`, + `pop_unsigned_minimum`, `pop_unsigned_maximum`, `unsigned_min_elt` + and `unsigned_max_elt` respectively, to clarify that these functions consider + negative numbers as larger than positive ones. - Fixed a bug where NodeWithId wasn't incrementing ids properly - `zarith` is no longer a dependency, used GCC's `__builtin_clz` as a faster method of finding an integer's highest bit. diff --git a/README.md b/README.md index ce0c9c9..11aee8f 100644 --- a/README.md +++ b/README.md @@ -82,9 +82,8 @@ dune build @doc This also avoids a bug in Okasaki's paper discussed in [*QuickChecking Patricia Trees*](https://www.cs.tufts.edu/comp/150FP/archive/jan-midtgaard/qc-patricia.pdf) by Jan Mitgaard. - It also affects functions like `min_binding` and `pop_minimum`. They will return the smallest - positive integer of both positive and negative keys are present; and not the smallest negative, - as one might expect. + It also affects functions like `unsigned_min_binding` and `pop_unsigned_minimum`. They will return the smallest + positive integer of both positive and negative keys are present; and not the smallest negative, as one might expect. - Supports generic maps and sets: a `'m map` that maps `'k key` to `('k, 'm) value`. This is especially useful when using [GADTs](https://v2.ocaml.org/manual/gadts-tutorial.html) for the type of keys. This is also sometimes called a dependent map. - Allows easy and fast operations across different types of maps and set (e.g. diff --git a/index.mld b/index.mld index 24d3e33..9e621c6 100644 --- a/index.mld +++ b/index.mld @@ -69,8 +69,8 @@ dune build @doc {{: https://www.cs.tufts.edu/comp/150FP/archive/jan-midtgaard/qc-patricia.pdf}{i QuickChecking Patricia Trees}} by Jan Mitgaard. - It also affects functions like {{!PatriciaTree.BASE_MAP.min_binding}[min_binding]} - and {{!PatriciaTree.BASE_MAP.pop_minimum}[pop_minimum}. They will return the smallest + It also affects functions like {{!PatriciaTree.BASE_MAP.unsigned_min_binding}[unsigned_min_binding]} + and {{!PatriciaTree.BASE_MAP.pop_unsigned_minimum}[pop_unsigned_minimum}. They will return the smallest positive integer of both positive and negative keys are present; and not the smallest negative, as one might expect.} {li Supports generic maps and sets: a ['m map] that maps ['k key] to [('k, 'm) value]. diff --git a/patriciaTree.ml b/patriciaTree.ml index 7a0c9e4..65621f1 100644 --- a/patriciaTree.ml +++ b/patriciaTree.ml @@ -58,8 +58,8 @@ module type BASE_MAP = sig type 'map key_value_pair = KeyValue : 'a key * ('a, 'map) value -> 'map key_value_pair - val min_binding : 'a t -> 'a key_value_pair - val max_binding : 'a t -> 'a key_value_pair + val unsigned_min_binding : 'a t -> 'a key_value_pair + val unsigned_max_binding : 'a t -> 'a key_value_pair val singleton : 'a key -> ('a, 'b) value -> 'b t val cardinal : 'a t -> int val is_singleton : 'a t -> 'a key_value_pair option @@ -67,8 +67,8 @@ module type BASE_MAP = sig val find_opt : 'key key -> 'map t -> ('key, 'map) value option val mem : 'key key -> 'map t -> bool val remove : 'key key -> 'map t -> 'map t - val pop_minimum: 'map t -> ('map key_value_pair * 'map t) option - val pop_maximum: 'map t -> ('map key_value_pair * 'map t) option + val pop_unsigned_minimum: 'map t -> ('map key_value_pair * 'map t) option + val pop_unsigned_maximum: 'map t -> ('map key_value_pair * 'map t) option val insert: 'a key -> (('a,'map) value option -> ('a,'map) value) -> 'map t -> 'map t val update: 'a key -> (('a,'map) value option -> ('a,'map) value option) -> 'map t -> 'map t @@ -186,10 +186,10 @@ module type HETEROGENEOUS_SET = sig val cardinal: t -> int val is_singleton: t -> any_elt option val remove: 'a elt -> t -> t - val min_elt: t -> any_elt - val max_elt: t -> any_elt - val pop_minimum: t -> (any_elt * t) option - val pop_maximum: t -> (any_elt * t) option + val unsigned_min_elt: t -> any_elt + val unsigned_max_elt: t -> any_elt + val pop_unsigned_minimum: t -> (any_elt * t) option + val pop_unsigned_maximum: t -> (any_elt * t) option val union: t -> t -> t val inter: t -> t -> t val disjoint: t -> t -> bool @@ -241,10 +241,10 @@ module type SET = sig val cardinal: t -> int val is_singleton: t -> elt option val remove: elt -> t -> t - val min_elt: t -> elt - val max_elt: t -> elt - val pop_minimum: t -> (elt * t) option - val pop_maximum: t -> (elt * t) option + val unsigned_min_elt: t -> elt + val unsigned_max_elt: t -> elt + val pop_unsigned_minimum: t -> (elt * t) option + val pop_unsigned_maximum: t -> (elt * t) option val iter: (elt -> unit) -> t -> unit val filter: (elt -> bool) -> t -> t val for_all: (elt -> bool) -> t -> bool @@ -282,8 +282,8 @@ module type MAP = sig val empty : 'a t val is_empty : 'a t -> bool - val min_binding : 'a t -> (key * 'a) - val max_binding : 'a t -> (key * 'a) + val unsigned_min_binding : 'a t -> (key * 'a) + val unsigned_max_binding : 'a t -> (key * 'a) val singleton : key -> 'a -> 'a t val cardinal : 'a t -> int val is_singleton : 'a t -> (key * 'a) option @@ -291,8 +291,8 @@ module type MAP = sig val find_opt : key -> 'a t -> 'a option val mem : key -> 'a t -> bool val remove : key -> 'a t -> 'a t - val pop_minimum : 'a t -> (key * 'a * 'a t) option - val pop_maximum : 'a t -> (key * 'a * 'a t) option + val pop_unsigned_minimum : 'a t -> (key * 'a * 'a t) option + val pop_unsigned_maximum : 'a t -> (key * 'a * 'a t) option val insert : key -> ('a option -> 'a) -> 'a t -> 'a t val update : key -> ('a option -> 'a option) -> 'a t -> 'a t val add : key -> 'a -> 'a t -> 'a t @@ -590,14 +590,14 @@ module MakeCustomHeterogeneous include NODE type 'map key_value_pair = KeyValue: 'a Key.t * ('a,'map) value -> 'map key_value_pair - let rec min_binding x = match NODE.view x with + let rec unsigned_min_binding x = match NODE.view x with | Empty -> raise Not_found | Leaf{key;value} -> KeyValue(key,value) - | Branch{tree0;_} -> min_binding tree0 - let rec max_binding x = match NODE.view x with + | Branch{tree0;_} -> unsigned_min_binding tree0 + let rec unsigned_max_binding x = match NODE.view x with | Empty -> raise Not_found | Leaf{key;value} -> KeyValue(key,value) - | Branch{tree1;_} -> max_binding tree1 + | Branch{tree1;_} -> unsigned_max_binding tree1 (* Merge trees whose prefix disagree. *) @@ -771,7 +771,7 @@ module MakeCustomHeterogeneous | Empty -> (* Can only happen in weak sets and maps. *) raise Disappeared - let pop_minimum m = match NODE.view m with + let pop_unsigned_minimum m = match NODE.view m with | Empty -> None | _ -> Some(pop_min_nonempty m) @@ -786,7 +786,7 @@ module MakeCustomHeterogeneous (* Can only happen in weak sets and maps. *) | Empty -> raise Disappeared - let pop_maximum m = match NODE.view m with + let pop_unsigned_maximum m = match NODE.view m with | Empty -> None | _ -> Some(pop_max_nonempty m) @@ -1398,11 +1398,11 @@ module MakeHeterogeneousSet(Key:HETEROGENEOUS_KEY) : HETEROGENEOUS_SET let f: type a. a key -> unit -> 'acc -> 'acc = fun k () acc -> f.f k acc in BaseMap.fold { f } set acc - let min_elt t = let KeyValue(m, ()) = BaseMap.min_binding t in Any m - let max_elt t = let KeyValue(m, ()) = BaseMap.max_binding t in Any m + let unsigned_min_elt t = let KeyValue(m, ()) = BaseMap.unsigned_min_binding t in Any m + let unsigned_max_elt t = let KeyValue(m, ()) = BaseMap.unsigned_max_binding t in Any m - let pop_maximum t = Option.map (fun (KeyValue(m,()),t) -> Any m,t) (BaseMap.pop_maximum t) - let pop_minimum t = Option.map (fun (KeyValue(m,()),t) -> Any m,t) (BaseMap.pop_minimum t) + let pop_unsigned_maximum t = Option.map (fun (KeyValue(m,()),t) -> Any m,t) (BaseMap.pop_unsigned_maximum t) + let pop_unsigned_minimum t = Option.map (fun (KeyValue(m,()),t) -> Any m,t) (BaseMap.pop_unsigned_minimum t) type polypretty = { f: 'a. Format.formatter -> 'a key -> unit; } [@@unboxed] let pretty ?pp_sep f fmt s = BaseMap.pretty ?pp_sep { f = fun fmt k () -> f.f fmt k} fmt s @@ -1481,16 +1481,16 @@ module MakeCustom let update k f m = update k (fun v -> snd_opt (f (opt_snd v))) m let add k v m = add k (Snd v) m let split x m = let (l,m,r) = split x m in (l, opt_snd m, r) - let min_binding m = let KeyValue(key,Snd value) = BaseMap.min_binding m in key,value - let max_binding m = let KeyValue(key,Snd value) = BaseMap.max_binding m in key,value + let unsigned_min_binding m = let KeyValue(key,Snd value) = BaseMap.unsigned_min_binding m in key,value + let unsigned_max_binding m = let KeyValue(key,Snd value) = BaseMap.unsigned_max_binding m in key,value (* let singleton k v = BaseMap.singleton (PolyKey.K k) v *) - let pop_minimum m = - match BaseMap.pop_minimum m with + let pop_unsigned_minimum m = + match BaseMap.pop_unsigned_minimum m with | None -> None | Some(KeyValue(key,Snd value),m) -> Some(key,value,m) - let pop_maximum m = - match BaseMap.pop_maximum m with + let pop_unsigned_maximum m = + match BaseMap.pop_unsigned_maximum m with | None -> None | Some(KeyValue(key,Snd value),m) -> Some(key,value,m) @@ -1585,10 +1585,10 @@ module MakeSet(Key: KEY) : SET with type elt = Key.t = struct | None -> None | Some(KeyValue(k,())) -> Some k - let min_elt t = let Any x = min_elt t in x - let max_elt t = let Any x = max_elt t in x - let pop_minimum t = Option.map (fun (Any x, t) -> (x,t)) (pop_minimum t) - let pop_maximum t = Option.map (fun (Any x, t) -> (x,t)) (pop_maximum t) + let unsigned_min_elt t = let Any x = unsigned_min_elt t in x + let unsigned_max_elt t = let Any x = unsigned_max_elt t in x + let pop_unsigned_minimum t = Option.map (fun (Any x, t) -> (x,t)) (pop_unsigned_minimum t) + let pop_unsigned_maximum t = Option.map (fun (Any x, t) -> (x,t)) (pop_unsigned_maximum t) let to_seq m = Seq.map (fun (BaseMap.KeyValue(elt,())) -> elt) (BaseMap.to_seq m) let to_rev_seq m = Seq.map (fun (BaseMap.KeyValue(elt,())) -> elt) (BaseMap.to_rev_seq m) diff --git a/patriciaTree.mli b/patriciaTree.mli index a6ef55b..6969312 100644 --- a/patriciaTree.mli +++ b/patriciaTree.mli @@ -48,7 +48,7 @@ notably (key,value) pairs or different types to be in the same map, or to choose the memory representation of the nodes of the tree. - - Some operations like [pop_minimum] and [pop_maximum] make our Set + - Some operations like [pop_unsigned_minimum] and [pop_unsigned_maximum] make our Set suitable as priority queue (but remember that each element in the queue must map to a distinct integer). *) @@ -190,13 +190,13 @@ module type BASE_MAP = sig (** {3 Basic functions} *) - val min_binding : 'a t -> 'a key_value_pair - (** [min_binding m] is minimal binding [KeyValue(k,v)] of the map, + val unsigned_min_binding : 'a t -> 'a key_value_pair + (** [unsigned_min_binding m] is minimal binding [KeyValue(k,v)] of the map, using the {{!unsigned_lt}unsigned order} on [Key.to_int]. @raises Not_found if the map is empty *) - val max_binding : 'a t -> 'a key_value_pair - (** [max_binding m] is maximal binding [KeyValue(k,v)] of the map, + val unsigned_max_binding : 'a t -> 'a key_value_pair + (** [unsigned_max_binding m] is maximal binding [KeyValue(k,v)] of the map, using the {{!unsigned_lt}unsigned order} on [Key.to_int]. @raises Not_found if the map is empty *) @@ -224,15 +224,15 @@ module type BASE_MAP = sig (** Returns a map with the element removed, O(log(n)) complexity. Returns a physically equal map if the element is absent. *) - val pop_minimum: 'map t -> ('map key_value_pair * 'map t) option - (** [pop_minimum m] returns [None] if [is_empty m], or [Some(key,value,m')] where - [(key,value) = min_binding m] and [m' = remove m key]. + val pop_unsigned_minimum: 'map t -> ('map key_value_pair * 'map t) option + (** [pop_unsigned_minimum m] returns [None] if [is_empty m], or [Some(key,value,m')] where + [(key,value) = unsigned_min_binding m] and [m' = remove m key]. Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. O(log(n)) complexity. *) - val pop_maximum: 'map t -> ('map key_value_pair * 'map t) option - (** [pop_maximum m] returns [None] if [is_empty m], or [Some(key,value,m')] where - [(key,value) = max_binding m] and [m' = remove m key]. + val pop_unsigned_maximum: 'map t -> ('map key_value_pair * 'map t) option + (** [pop_unsigned_maximum m] returns [None] if [is_empty m], or [Some(key,value,m')] where + [(key,value) = unsigned_max_binding m] and [m' = remove m key]. Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. O(log(n)) complexity. *) @@ -544,23 +544,23 @@ module type HETEROGENEOUS_SET = sig (** [remove elt set] returns a set containing all elements of [set] except [elt]. Returns a value physically equal to [set] if [elt] is not present. *) - val min_elt: t -> any_elt + val unsigned_min_elt: t -> any_elt (** The minimal element if non empty, according to the {{!unsigned_lt}unsigned order} on elements. @raises Not_found *) - val max_elt: t -> any_elt + val unsigned_max_elt: t -> any_elt (** The maximal element if non empty, according to the {{!unsigned_lt}unsigned order} on elements. @raises Not_found *) - val pop_minimum: t -> (any_elt * t) option - (** [pop_minimum s] is [Some (elt, s')] where [elt = min_elt s] and [s' = remove elt s] + val pop_unsigned_minimum: t -> (any_elt * t) option + (** [pop_unsigned_minimum s] is [Some (elt, s')] where [elt = unsigned_min_elt s] and [s' = remove elt s] if [s] is non empty. Uses the {{!unsigned_lt}unsigned order} on elements. *) - val pop_maximum: t -> (any_elt * t) option - (** [pop_maximum s] is [Some (elt, s')] where [elt = max_elt s] and [s' = remove elt s] + val pop_unsigned_maximum: t -> (any_elt * t) option + (** [pop_unsigned_maximum s] is [Some (elt, s')] where [elt = unsigned_max_elt s] and [s' = remove elt s] if [s] is non empty. Uses the {{!unsigned_lt}unsigned order} on elements. *) @@ -687,21 +687,21 @@ module type SET = sig (** [remove elt set] returns a set containing all elements of [set] except [elt]. Returns a value physically equal to [set] if [elt] is not present. *) - val min_elt: t -> elt + val unsigned_min_elt: t -> elt (** The minimal element (according to the {{!unsigned_lt}unsigned order} on [Key.to_int]) if non empty. @raises Not_found *) - val max_elt: t -> elt + val unsigned_max_elt: t -> elt (** The maximal element (according to the {{!unsigned_lt}unsigned order} on [Key.to_int]) if non empty. @raises Not_found *) - val pop_minimum: t -> (elt * t) option - (** [pop_minimum s] is [Some (elt, s')] where [elt = min_elt s] and [s' = remove elt s] + val pop_unsigned_minimum: t -> (elt * t) option + (** [pop_unsigned_minimum s] is [Some (elt, s')] where [elt = unsigned_min_elt s] and [s' = remove elt s] if [s] is non empty. Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) - val pop_maximum: t -> (elt * t) option - (** [pop_maximum s] is [Some (elt, s')] where [elt = max_elt s] and [s' = remove elt s] + val pop_unsigned_maximum: t -> (elt * t) option + (** [pop_unsigned_maximum s] is [Some (elt, s')] where [elt = unsigned_max_elt s] and [s' = remove elt s] if [s] is non empty. Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) @@ -806,12 +806,12 @@ module type MAP = sig val is_empty : 'a t -> bool (** Test if a map is empty; O(1) complexity. *) - val min_binding : 'a t -> (key * 'a) + val unsigned_min_binding : 'a t -> (key * 'a) (** Returns the (key,value) where [Key.to_int key] is minimal (in the {{!unsigned_lt}unsigned representation} of integers); O(log n) complexity. @raises Not_found if the map is empty *) - val max_binding : 'a t -> (key * 'a) + val unsigned_max_binding : 'a t -> (key * 'a) (** Returns the (key,value) where [Key.to_int key] is maximal (in the {{!unsigned_lt}unsigned representation} of integers); O(log n) complexity. @raises Not_found if the map is empty *) @@ -838,14 +838,14 @@ module type MAP = sig (** Returns a map with the element removed, O(log(n)) complexity. Returns a physically equal map if the element is absent. *) - val pop_minimum : 'a t -> (key * 'a * 'a t) option - (** [pop_minimum m] returns [None] if [is_empty m], or [Some(key,value,m')] where - [(key,value) = min_binding m] and [m' = remove m key]. O(log(n)) complexity. + val pop_unsigned_minimum : 'a t -> (key * 'a * 'a t) option + (** [pop_unsigned_minimum m] returns [None] if [is_empty m], or [Some(key,value,m')] where + [(key,value) = unsigned_min_binding m] and [m' = remove m key]. O(log(n)) complexity. Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) - val pop_maximum : 'a t -> (key * 'a * 'a t) option - (** [pop_maximum m] returns [None] if [is_empty m], or [Some(key,value,m')] where - [(key,value) = max_binding m] and [m' = remove m key]. O(log(n)) complexity. + val pop_unsigned_maximum : 'a t -> (key * 'a * 'a t) option + (** [pop_unsigned_maximum m] returns [None] if [is_empty m], or [Some(key,value,m')] where + [(key,value) = unsigned_max_binding m] and [m' = remove m key]. O(log(n)) complexity. Uses the {{!unsigned_lt}unsigned order} on [Key.to_int]. *) val insert : key -> ('a option -> 'a) -> 'a t -> 'a t @@ -1120,7 +1120,7 @@ module type KEY = sig Note that since Patricia Trees use {{!unsigned_lt}unsigned order}, negative keys are seen as bigger than positive keys. Be wary of this when using negative keys combined with functions like - {{!BASE_MAP.max_binding}[max_binding]} and {{!BASE_MAP.pop_maximum}[pop_maximum]}. *) + {{!BASE_MAP.unsigned_max_binding}[unsigned_max_binding]} and {{!BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]}. *) val to_int: t -> int end @@ -1146,7 +1146,7 @@ module type HETEROGENEOUS_KEY = sig Note that since Patricia Trees use {{!unsigned_lt}unsigned order}, negative keys are seen as bigger than positive keys. Be wary of this when using negative keys combined with functions like - {{!BASE_MAP.max_binding}[max_binding]} and {{!BASE_MAP.pop_maximum}[pop_maximum]}. *) + {{!BASE_MAP.unsigned_max_binding}[unsigned_max_binding]} and {{!BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]}. *) val polyeq : 'a t -> 'b t -> ('a, 'b) cmp (** Polymorphic equality function used to compare our keys. diff --git a/patriciaTreeTest.ml b/patriciaTreeTest.ml index d66d9ed..bba35cb 100644 --- a/patriciaTreeTest.ml +++ b/patriciaTreeTest.ml @@ -211,12 +211,12 @@ let%test_module _ = (module struct | None, _ | _, None -> None | Some a, Some b -> (f key a b)) m1 m2 - let pop_minimum m = + let pop_unsigned_minimum m = match M.min_binding m with | exception Not_found -> None | (key,value) -> Some(key,value,M.remove key m) - let pop_maximum m = + let pop_unsigned_maximum m = match M.max_binding m with | exception Not_found -> None | (key,value) -> Some(key,value,M.remove key m) @@ -288,22 +288,22 @@ let%test_module _ = (module struct module Foreign = MyMap.WithForeign(MyMap.BaseMap) - let test_pop_minimum = QCheck.Test.make ~count:1000 ~name:"pop_minimum" + let test_pop_minimum = QCheck.Test.make ~count:1000 ~name:"pop_unsigned_minimum" QCheck.(small_list (pair nat_gen nat_gen)) (fun x -> let m = extend_map MyMap.empty x in let model = intmap_of_mymap m in - match MyMap.pop_minimum m, IntMap.pop_minimum model with + match MyMap.pop_unsigned_minimum m, IntMap.pop_unsigned_minimum model with | None, Some _ | Some _, None -> false | None, None -> true | Some(key1,val1,m'), Some(key2,val2,model') -> key1 = key2 && val1 = val2 && IntMap.equal (=) (intmap_of_mymap m') model') let () = QCheck.Test.check_exn test_pop_minimum - let test_pop_maximum = QCheck.Test.make ~count:1000 ~name:"pop_maximum" + let test_pop_maximum = QCheck.Test.make ~count:1000 ~name:"pop_unsigned_maximum" QCheck.(small_list (pair nat_gen nat_gen)) (fun x -> let m = extend_map MyMap.empty x in let model = intmap_of_mymap m in - match MyMap.pop_maximum m, IntMap.pop_maximum model with + match MyMap.pop_unsigned_maximum m, IntMap.pop_unsigned_maximum model with | None, Some _ | Some _, None -> false | None, None -> true | Some(key1,val1,m'), Some(key2,val2,model') -> From daf2e68f3a8aa62a624885ec0fbca9b0a97d22d8 Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Mon, 29 Apr 2024 14:17:34 +0200 Subject: [PATCH 10/14] Test for highest bit --- patriciaTree.mli | 22 ++++++++++++++++++---- patriciaTreeTest.ml | 32 +++++++++++++++++++++++++------- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/patriciaTree.mli b/patriciaTree.mli index 6969312..4232fb0 100644 --- a/patriciaTree.mli +++ b/patriciaTree.mli @@ -31,8 +31,9 @@ each key to be mapped to a unique integer identifier. - The implementation uses Patricia Tree, as described in Oksasaki - and Gill's 1998 paper "Fast mergeable integer maps", i.e. it is a - space-efficient prefix trie over the big-endian representation of + and Gill's 1998 paper + {{: https://www.semanticscholar.org/paper/Fast-Mergeable-Integer-Maps-Okasaki-Gill/23003be706e5f586f23dd7fa5b2a410cc91b659d}{i Fast mergeable integer maps}}, + i.e. it is a space-efficient prefix trie over the big-endian representation of the key's integer identifier. The main benefit of Patricia Tree is that their representation @@ -48,9 +49,12 @@ notably (key,value) pairs or different types to be in the same map, or to choose the memory representation of the nodes of the tree. - - Some operations like [pop_unsigned_minimum] and [pop_unsigned_maximum] make our Set + - Some operations like {{!BASE_MAP.pop_unsigned_minimum}[pop_unsigned_minimum]} and + {{!BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]} make our Set suitable as priority queue (but remember that each element in the - queue must map to a distinct integer). *) + queue must map to a distinct integer, and that using the {{!unsigned_lt}unsigned order} + means elements with negative priority are seen as greater than elements with + positive ones). *) (** Note on complexity: in the following, n represents the size of the map when there is one (and [|map1|] is the number of elements in @@ -90,6 +94,16 @@ val unsigned_lt : int -> int -> bool @since 0.10.0 *) +(**/**) + +val highest_bit : int -> (int[@untagged]) +(** [highest_bit x] is an integer with a single bit set: the highest set bit of [x]. + exported for test purposes only. + + @since 0.10.0 *) + +(**/**) + (** {1 Nodes} *) (** This module explains how a node is stored in memory, with diff --git a/patriciaTreeTest.ml b/patriciaTreeTest.ml index bba35cb..68bead3 100644 --- a/patriciaTreeTest.ml +++ b/patriciaTreeTest.ml @@ -21,6 +21,24 @@ open PatriciaTree +let check_highest_bit x res = + (* Printf.printf "CHECK_HIGHEST_BIT: %x %x\n%!" x res; *) + if (x = 0) + then (res = 0) + else begin + x != 0 && + (* The result is a single bit set. *) + res land (res-1) == 0 && + (* The bit x is set. *) + x land res = res && + (* It is the highest bit. *) + x land (lnot res) land (lnot (res - 1)) = 0 + end + +let () = QCheck.Test.check_exn @@ + QCheck.Test.make ~count:1000 ~name:"highest_bit" QCheck.int (fun x -> + check_highest_bit x (highest_bit x)) + let%test_module "TestHeterogeneous" = (module struct module MyKey = struct @@ -258,12 +276,12 @@ let%test_module _ = (module struct let third = extend_map first alist3 in (second,third) - let nat_gen = QCheck.int + let number_gen = QCheck.int let gen = QCheck.(triple - (small_list (pair nat_gen nat_gen)) - (small_list (pair nat_gen nat_gen)) - (small_list (pair nat_gen nat_gen)));; + (small_list (pair number_gen number_gen)) + (small_list (pair number_gen number_gen)) + (small_list (pair number_gen number_gen))) let model_from_gen x = let (m1,m2) = two_maps_from_three_lists x in @@ -289,7 +307,7 @@ let%test_module _ = (module struct module Foreign = MyMap.WithForeign(MyMap.BaseMap) let test_pop_minimum = QCheck.Test.make ~count:1000 ~name:"pop_unsigned_minimum" - QCheck.(small_list (pair nat_gen nat_gen)) (fun x -> + QCheck.(small_list (pair number_gen number_gen)) (fun x -> let m = extend_map MyMap.empty x in let model = intmap_of_mymap m in match MyMap.pop_unsigned_minimum m, IntMap.pop_unsigned_minimum model with @@ -300,7 +318,7 @@ let%test_module _ = (module struct let () = QCheck.Test.check_exn test_pop_minimum let test_pop_maximum = QCheck.Test.make ~count:1000 ~name:"pop_unsigned_maximum" - QCheck.(small_list (pair nat_gen nat_gen)) (fun x -> + QCheck.(small_list (pair number_gen number_gen)) (fun x -> let m = extend_map MyMap.empty x in let model = intmap_of_mymap m in match MyMap.pop_unsigned_maximum m, IntMap.pop_unsigned_maximum model with @@ -338,7 +356,7 @@ let%test_module _ = (module struct in f let test_map_filter = QCheck.Test.make ~count:1000 ~name:"map_filter" - QCheck.(small_list (pair nat_gen nat_gen)) (fun x -> + QCheck.(small_list (pair number_gen number_gen)) (fun x -> let m1 = extend_map MyMap.empty x in let model1 = intmap_of_mymap m1 in let chk_calls1 = check_increases () in From 09c339c957790ae757d28d1ea1f96c654d810ceb Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Fri, 26 Apr 2024 10:30:32 +0200 Subject: [PATCH 11/14] Don't upload artifact on pull request (cherry picked from commit 9e44c67af520853cfc03a04f0be1883f626f0943) --- .github/workflows/ocaml.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ocaml.yml b/.github/workflows/ocaml.yml index f19f282..ffa1867 100644 --- a/.github/workflows/ocaml.yml +++ b/.github/workflows/ocaml.yml @@ -62,6 +62,8 @@ jobs: run: echo "ref=$(echo ${GITHUB_REF#refs/*/})" >> ${GITHUB_OUTPUT} - uses: actions/upload-artifact@v4 + if: github.event_name == 'push' && + ( github.ref == 'refs/heads/main' || startsWith(github.ref,'refs/tags') ) with: name: doc-${{ steps.vars.outputs.ref }} path: _build/default/_doc/_html/patricia-tree/ From d40d47f2b818efa72f49e2eda0e8c351782b024c Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Mon, 29 Apr 2024 15:15:55 +0200 Subject: [PATCH 12/14] Slight change --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61a7eac..9262dad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ # Unreleased -- Patricia Tree now support using negative keys. Since the order used only depends - on the bitwise representation, negative keys appear bigger than any positive - keys. +- Patricia Tree now support using negative keys. Tree are built using the bitwise representation + of integer, meaning they effectively use an unsigned order. Negative keys are + considered bigger than positive keys, `0` is the minimal number and `-1` the maximal one. - Renamed `min_binding`, `max_binding`, `pop_minimum`, `pop_maximum`, `min_elt` and `max_elt` to `unsigned_min_binding`, `unsigned_max_binding`, `pop_unsigned_minimum`, `pop_unsigned_maximum`, `unsigned_min_elt` From 4806ca9baf4a7d8240839742b86dd0e5811bc44f Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Tue, 30 Apr 2024 16:42:23 +0200 Subject: [PATCH 13/14] test unsigned_lt --- patriciaTreeTest.ml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/patriciaTreeTest.ml b/patriciaTreeTest.ml index 68bead3..842bc3a 100644 --- a/patriciaTreeTest.ml +++ b/patriciaTreeTest.ml @@ -39,6 +39,18 @@ let () = QCheck.Test.check_exn @@ QCheck.Test.make ~count:1000 ~name:"highest_bit" QCheck.int (fun x -> check_highest_bit x (highest_bit x)) +let unsigned_lt_ref x y = + if x >= 0 && y >= 0 + then x < y + else if x >= 0 + then (* pos < neg *) true + else if y >= 0 then false + else x < y + +let () = QCheck.Test.check_exn @@ + QCheck.Test.make ~count:1000 ~name:"unsigned_lt" QCheck.(pair int int) (fun (x,y) -> + unsigned_lt x y = unsigned_lt_ref x y) + let%test_module "TestHeterogeneous" = (module struct module MyKey = struct From b63d36b549281d2b09f4d16510241d8e8de10c3f Mon Sep 17 00:00:00 2001 From: Dorian Lesbre Date: Tue, 30 Apr 2024 16:42:41 +0200 Subject: [PATCH 14/14] Fixed documentation comment --- README.md | 4 ++-- index.mld | 5 ++--- patriciaTree.mli | 8 ++++---- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 11aee8f..e006dd0 100644 --- a/README.md +++ b/README.md @@ -86,8 +86,8 @@ dune build @doc positive integer of both positive and negative keys are present; and not the smallest negative, as one might expect. - Supports generic maps and sets: a `'m map` that maps `'k key` to `('k, 'm) value`. This is especially useful when using [GADTs](https://v2.ocaml.org/manual/gadts-tutorial.html) for the type of keys. This is also sometimes called a dependent map. -- Allows easy and fast operations across different types of maps and set (e.g. - an intersection between a map and a set), since all sets and maps, no matter their key type, are really integer sets or maps. +- Allows easy and fast operations across different types of maps and set + which have the same type of keys (e.g. an intersection between a map and a set). - Multiple choices for internal representation (`NODE`), which allows for efficient storage (no need to store a value for sets), or using weak nodes only (values removed from the tree if no other pointer to it exists). This system can also be extended to store size information in nodes if needed. diff --git a/index.mld b/index.mld index 9e621c6..a2c8add 100644 --- a/index.mld +++ b/index.mld @@ -76,9 +76,8 @@ dune build @doc {li Supports generic maps and sets: a ['m map] that maps ['k key] to [('k, 'm) value]. This is especially useful when using {{: https://v2.ocaml.org/manual/gadts-tutorial.html}GADTs} for the type of keys. This is also sometimes called a dependent map.} -{li Allows easy and fast operations across different types of maps and set (e.g. - an intersection between a map and a set), since all sets and maps, no matter their key type, - are really integer sets or maps.} +{li Allows easy and fast operations across different types of maps and set + which have the same type of keys (e.g. an intersection between a map and a set).} {li Multiple choices for internal representation ({!PatriciaTree.NODE}), which allows for efficient storage (no need to store a value for sets), or using weak nodes only (values removed from the tree if no other pointer to it exists). This system can also be extended to store size information in nodes if needed.} diff --git a/patriciaTree.mli b/patriciaTree.mli index 4232fb0..2199992 100644 --- a/patriciaTree.mli +++ b/patriciaTree.mli @@ -467,7 +467,8 @@ module type HETEROGENEOUS_MAP = sig include BASE_MAP - (** Operation with maps/set of different types *) + (** Operation with maps/set of different types. + [Map2] must use the same [Key.to_int] function. *) module WithForeign(Map2:BASE_MAP with type 'a key = 'a key):sig type ('map1,'map2) polyinter_foreign = { f: 'a. 'a key -> ('a,'map1) value -> ('a,'map2) Map2.value -> ('a,'map1) value } [@@unboxed] @@ -1048,9 +1049,8 @@ module type MAP = sig val disjoint : 'a t -> 'a t -> bool - (* Maybe: WithForeign and WithForeignHeterogeneous. *) - - (** Combination with other kinds of maps. *) + (** Combination with other kinds of maps. + [Map2] must use the same [Key.to_int] function. *) module WithForeign(Map2 : BASE_MAP with type _ key = key):sig type ('b,'c) polyfilter_map_foreign = { f: 'a. key -> ('a,'b) Map2.value -> 'c option } [@@unboxed]