diff --git a/ocaml/tests/bench/bechamel_simple_cli.ml b/ocaml/tests/bench/bechamel_simple_cli.ml
index f2c24062f4b..8b4b1353ff7 100644
--- a/ocaml/tests/bench/bechamel_simple_cli.ml
+++ b/ocaml/tests/bench/bechamel_simple_cli.ml
@@ -1,28 +1,116 @@
 (* based on bechamel example code *)
 open Bechamel
 open Toolkit
+open Cmdliner
 
-let instances = Instance.[monotonic_clock; minor_allocated; major_allocated]
+let span =
+  let parse s =
+    try Ok (s |> Float.of_string |> Bechamel.Time.second)
+    with e -> Error (Printexc.to_string e)
+  in
+  let print ppf t =
+    Format.fprintf ppf "%fs"
+      ((t |> Bechamel.Time.span_to_uint64_ns |> Int64.to_float) *. 1e-9)
+  in
+  Arg.conv' ~docv:"SECONDS" (parse, print)
+
+let measures =
+  let witness =
+    Instance.
+      [
+        minor_allocated
+      ; major_allocated
+      ; promoted
+      ; compaction
+      ; minor_collection
+      ; major_collection
+      ; monotonic_clock
+      ]
+    (* cannot add the perf measures because they instantly fail when not root even when we're not using them,
+       just referencing their name causes the module to be initialized
+    *)
+    |> List.map (fun w -> (Measure.label w, w))
+    |> Arg.enum
+  in
+  Arg.(
+    value
+    & opt (list witness)
+        Instance.[monotonic_clock; minor_allocated; major_allocated]
+    & info ["measures"] ~doc:"What measurements to record" ~docv:"MEASURE"
+  )
 
 let cfg =
-  (* stabilize:true would be the default but it can cause function runtimes to be 10x the amount measured without.
-     It is also confusing for flamegraphs because the GC will show up much more frequently than in reality
-     due to the thousands of repeated calls.
-  *)
-  Benchmark.cfg
-    ~quota:Time.(second 5.0)
-    ~start:10 ~stabilize:false ~compaction:false ()
-
-let benchmark tests = Benchmark.all cfg instances tests
-
-let analyze raw_results =
-  let ols =
-    Analyze.ols ~r_square:true ~bootstrap:0 ~predictors:[|Measure.run|]
+  let limit =
+    Arg.(
+      value
+      & opt (some int) None
+      & info ["limit"] ~doc:"maximum number of samples allowed" ~docv:"SAMPLES"
+    )
+  and quota =
+    Arg.(
+      value
+      & opt span (Time.second 5.0)
+      & info ["quota"] ~doc:"maximum time allowed" ~docv:"SECONDS"
+    )
+  and kde =
+    Arg.(
+      value
+      & opt (some int) None
+      & info ["kde"]
+          ~doc:
+            "additional measurements for KDE and histogram display. Actual \
+             time limit will be 2*quota"
+    )
+  and stabilize =
+    Arg.(
+      value
+      & opt bool false
+      & info ["stabilize"]
+          ~doc:
+            "stabilize the GC before each run. Beware that although \
+             measurements will be more stable they may sometimes slow down \
+             even 10x the measured value."
+    )
+  and start =
+    Arg.(
+      value
+      & opt (some int) None
+      & info ["start"] ~doc:"the first value of the run metric"
+    )
+  and compaction =
+    Arg.(
+      value
+      & opt bool false
+      & info ["compaction"]
+          ~doc:"whether to enable GC compaction during the benchmark"
+    )
+  in
+  let cfg limit quota kde stabilize start compaction =
+    Benchmark.cfg ?limit ~quota ~kde ~stabilize ?start ~compaction ()
   in
+  Term.(const cfg $ limit $ quota $ kde $ stabilize $ start $ compaction)
+
+let analysis =
+  let bootstrap =
+    Arg.(
+      value
+      & opt int 0
+      & info ["bootstrap"]
+          ~doc:
+            "how many times to resample the measurements (needed for \
+             confidence interval calculations)"
+    )
+  in
+  let analyze bootstrap =
+    Analyze.ols ~r_square:true ~bootstrap ~predictors:[|Measure.run|]
+  in
+  Term.(const analyze $ bootstrap)
+
+let analyze (_, measures, analysis, _) raw_results =
   let results =
-    List.map (fun instance -> Analyze.all ols instance raw_results) instances
+    List.map (fun measure -> Analyze.all analysis measure raw_results) measures
   in
-  (Analyze.merge ols instances results, raw_results)
+  (Analyze.merge analysis measures results, raw_results)
 
 let img (window, results) =
   Bechamel_notty.Multiple.image_of_ols_results ~rect:window
@@ -30,29 +118,84 @@ let img (window, results) =
 
 open Notty_unix
 
-let show_results results =
+type output = Tty | Json | Text
+
+let dump_hashtbl name pp_elt =
+  Fmt.Dump.iter_bindings Hashtbl.iter Fmt.(any name) Fmt.string pp_elt
+
+let dump_measure ppf m =
+  Format.fprintf ppf "%s: %s" (Measure.label m) (Measure.unit m)
+
+let show_results ((_, measures, _, (output, json_file)) as t) results =
+  let results, raw_results = analyze t results in
   let () =
-    List.iter (fun i -> Bechamel_notty.Unit.add i (Measure.unit i)) instances
-  in
+    match output with
+    | Tty ->
+        let () =
+          List.iter
+            (fun i -> Bechamel_notty.Unit.add i (Measure.unit i))
+            measures
+        in
 
-  let results, _ = results |> analyze in
+        let window =
+          match winsize Unix.stdout with
+          | Some (w, h) ->
+              {Bechamel_notty.w; h}
+          | None ->
+              {Bechamel_notty.w= 80; h= 1}
+        in
+        img (window, results) |> eol |> output_image
+    | Json ->
+        ()
+    | Text ->
+        Format.printf "Measures: %a@." Fmt.Dump.(list dump_measure) measures ;
 
-  let window =
-    match winsize Unix.stdout with
-    | Some (w, h) ->
-        {Bechamel_notty.w; h}
-    | None ->
-        {Bechamel_notty.w= 80; h= 1}
+        Format.printf "%a@."
+          (dump_hashtbl "results" (dump_hashtbl "result" Analyze.OLS.pp))
+          results
   in
-  img (window, results) |> eol |> output_image
-
-let cli tests =
-  Format.printf "@,Running benchmarks@." ;
-  let results = tests |> benchmark in
-  show_results results
+  let file = open_out json_file in
+  let finally () = close_out file in
+  Fun.protect ~finally @@ fun () ->
+  let open Bechamel_js in
+  match
+    emit ~dst:(Channel file)
+      (fun _ -> Ok ())
+      ~compare ~x_label:Measure.run
+      ~y_label:(Measure.label Instance.monotonic_clock)
+      (results, raw_results)
+  with
+  | Ok () ->
+      Format.printf "Saved JSON results to %s@." json_file
+  | Error (`Msg err) ->
+      invalid_arg err
 
 let results = Hashtbl.create 47
 
+let benchmark_cli =
+  let open Cmdliner in
+  let output =
+    Arg.(
+      value
+      & opt (enum [("tty", Tty); ("json", Json); ("text", Text)]) Tty
+      & info ["output"]
+    )
+  in
+  let json_file =
+    let self = Sys.executable_name |> Filename.basename in
+    Arg.(
+      value
+      & opt string (self ^ ".json")
+      & info ["output-json"] ~doc:"JSON file to write results to"
+    )
+  in
+  let tuple a b c output json_file =
+    let t = (a, b, c, (output, json_file)) in
+    let () = at_exit (fun () -> show_results t results) in
+    t
+  in
+  Term.(const tuple $ cfg $ measures $ analysis $ output $ json_file)
+
 open Alcotest.V1
 
 let to_alcotest tests =
@@ -63,13 +206,12 @@ let to_alcotest tests =
     |> Test.elements
     |> List.map @@ fun test ->
        let name = Test.Elt.name test in
-       test_case name `Slow @@ fun () ->
-       Hashtbl.add results name @@ Benchmark.run cfg instances test
+       test_case name `Slow @@ fun (cfg, measures, _, _) ->
+       Hashtbl.add results name @@ Benchmark.run cfg measures test
   in
   (name, tests)
 
 let alcotest_cli tests =
   let name = Bechamel.Test.name tests in
   (* Alcotest exits when filtering is used even with 'and_exit:false', so setup an at_exit handler to show results *)
-  let () = at_exit (fun () -> show_results results) in
-  run ~show_errors:true name [to_alcotest tests]
+  run_with_args ~show_errors:true name benchmark_cli [to_alcotest tests]
diff --git a/ocaml/tests/bench/bench_concurrent.ml b/ocaml/tests/bench/bench_concurrent.ml
index 0ce2c91fd8c..7f770e777ef 100644
--- a/ocaml/tests/bench/bench_concurrent.ml
+++ b/ocaml/tests/bench/bench_concurrent.ml
@@ -484,7 +484,8 @@ let benchmarks =
           Pam.authorize pam "pamtest-edvint" "pamtest-edvint"
         )
     ; Test.make ~name:"overhead" (Staged.stage ignore)
-    ; Test.make ~name:"parallel_c_work" (Staged.stage Bench_concurrent_util.parallel_c_work)
+    ; Test.make ~name:"parallel_c_work"
+        (Staged.stage Bench_concurrent_util.parallel_c_work)
     ; (let module T = TestBarrier (BarrierPreloaded) in
       T.test
       )
diff --git a/ocaml/tests/bench/bench_concurrent_util.ml b/ocaml/tests/bench/bench_concurrent_util.ml
index 1978ce143e3..28c10b24229 100644
--- a/ocaml/tests/bench/bench_concurrent_util.ml
+++ b/ocaml/tests/bench/bench_concurrent_util.ml
@@ -20,12 +20,12 @@ let parallel_c_work =
   fun ms ->
   let (_:int) = Sys.opaque_identity (bench_fixed_work (ms * n)) in ()*)
 
-(* 
-  So this will change with different compiler versions or CPU architectures,
-  but will be fixed on a given system.
+(*
+   So this will change with different compiler versions or CPU architectures,
+   but will be fixed on a given system.
 *)
 let parallel_c_work () =
-  let (_:int) = Sys.opaque_identity @@ bench_fixed_work @@ 4_000_000 in
+  let (_ : int) = Sys.opaque_identity @@ bench_fixed_work @@ 4_000_000 in
   ()
 
 let semaphore () = Semaphore.Binary.make false
diff --git a/ocaml/tests/bench/dune b/ocaml/tests/bench/dune
index 5407c2bb2a9..b5c3ae70552 100644
--- a/ocaml/tests/bench/dune
+++ b/ocaml/tests/bench/dune
@@ -1,7 +1,7 @@
 (library
  (name bechamel_simple_cli)
  (modules bechamel_simple_cli bench_concurrent_util)
- (libraries bechamel bechamel-notty bechamel.monotonic_clock notty.unix threads.posix alcotest)
+ (libraries bechamel bechamel-js bechamel-notty bechamel.monotonic_clock notty.unix threads.posix alcotest)
  (foreign_stubs
   (language c)
   (names bench_concurrent_stubs)
@@ -39,4 +39,4 @@
  (modes exe)
  (optional)
  (libraries pam mtime mtime.clock.os)
-)
+)
\ No newline at end of file
diff --git a/ocaml/tests/bench/scale_auth.ml b/ocaml/tests/bench/scale_auth.ml
index ad53ad7dcff..1252155c38a 100644
--- a/ocaml/tests/bench/scale_auth.ml
+++ b/ocaml/tests/bench/scale_auth.ml
@@ -62,9 +62,11 @@ let () =
       let handle = Pam.authenticate_start () in
       (* warm up PAM, there is a sleep(1) inside otherwise on init race condition *)
       let () = Pam.authorize handle !user !password in
-      if !warmup_close then begin Pam.authenticate_stop handle; None end
-      else
-      Some handle
+      if !warmup_close then (
+        Pam.authenticate_stop handle ;
+        None
+      ) else
+        Some handle
     else
       None
   in