-
Notifications
You must be signed in to change notification settings - Fork 106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Little bit faster BatList.cartesian_product #998
Conversation
this implementation avoids the call to List.concat as explained in issue ocaml-batteries-team#997
same than BatList.cartesian_product but avoids the calls to List.rev
Can you send a PR with just your version of BatList.cartesian_product? |
I think this is what I did. The *_iter and *_fold versions are in a different PR #999.
I also take quite a while between each steps, I can understand :) |
For the base |
Performance of using fold_right seems to be in the middle between let cartesian_product_right l1 l2 =
List.fold_right
(fun a acc ->
List.fold_right
(fun b acc -> (a, b)::acc)
l2 acc)
l1 []
Using 2 fold_left without Using 2 fold_right seems faster than 2 fold_left with the List.rev calls that preserves the same order of the output. |
Can you compare with this one:
|
You could use one of the several bench libraries for ocaml. |
Note: we have a |
I would use the following implementation, relying on Batteries' support for destination-passing-style. let cartesian_product l1 l2 =
match l1, l2 with
| [], _ | _, [] -> []
| _, _ ->
let dst = Acc.dummy () in
let _ =
List.fold_left (fun dst x1 ->
List.fold_left (fun dst x2 ->
Acc.accum dst (x1, x2)
) dst l2
) dst l1
in dst.tl I have not checked, but it should have performance comparable to @fccm's proposal, and preserve the output order. |
(* ocamlfind ocamlopt -linkpkg -package "unix,num,bigarray,str,bytes"
-I _build/src/ batteries.cmxa -o cp.opt cp.ml
*)
(* Thanks to Jacques Garrigue for suggesting the following structure *)
type 'a mut_list = {
hd: 'a;
mutable tl: 'a list
}
external inj : 'a mut_list -> 'a list = "%identity"
module Acc = struct
let dummy () =
{ hd = Obj.magic (); tl = [] }
let create x =
{ hd = x; tl = [] }
let accum acc x =
let cell = create x in
acc.tl <- inj cell;
cell
end
let cartesian_product_orig l1 l2 =
List.concat (List.map (fun a -> List.map (fun b -> (a, b)) l2) l1)
let cartesian_product_fold l1 l2 =
let l1 = List.rev l1 in
let l2 = List.rev l2 in
List.fold_left
(fun acc a ->
List.fold_left
(fun acc b -> (a, b)::acc)
acc l2)
[] l1
let cartesian_product_fold_rev l1 l2 =
List.fold_left
(fun acc a ->
List.fold_left
(fun acc b -> (a, b)::acc)
acc l2)
[] l1
let cartesian_product_dst l1 l2 =
match l1, l2 with
| [], _ | _, [] -> []
| _, _ ->
let dst = Acc.dummy () in
let _ =
List.fold_left (fun dst x1 ->
List.fold_left (fun dst x2 ->
Acc.accum dst (x1, x2)
) dst l2
) dst l1
in dst.tl
module A = BatArray
let cartesian_product_a_unsafe_get l1 l2 =
let a1 = A.of_list l1 in
let a2 = A.of_list l2 in
let res = ref [] in
let n = A.length a1 in
let m = A.length a2 in
for i = n - 1 downto 0 do
let a = A.unsafe_get a1 i in
for j = m - 1 downto 0 do
res := (a, A.unsafe_get a2 j) :: !res
done
done;
!res
let () =
let l1 = List.init 2_000 (fun i -> i) in
let l2 = List.init 4_000 (fun i -> i) in
let p =
match Sys.argv.(1) with
| "orig" -> cartesian_product_orig l1 l2
| "fold" -> cartesian_product_fold l1 l2
| "fold_rev" -> cartesian_product_fold_rev l1 l2
| "dst" -> cartesian_product_dst l1 l2
| "a" -> cartesian_product_a_unsafe_get l1 l2
| _ -> raise Exit
in
Printf.printf "len: %d\n%!" (List.length p);
;; |
When I run |
@fccm if you use a benchmarking library, you can have statistical results like variance and statistical significance of the observed change |
I agree with you. |
Under benchsuite/, there are several bench examples. |
closing because too old |
More details in this issue: #997
Also by avoiding the call to List.concat seems to make this function tail-rec, while the doc says that List.concat is not tail-rec.
You can test this PR with: