Skip to content

Commit

Permalink
irmin-pack-tool: Add a tool for last indexed accessible commits
Browse files Browse the repository at this point in the history
  • Loading branch information
clecat committed Mar 17, 2023
1 parent 0ffa990 commit 2d091a6
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 1 deletion.
35 changes: 34 additions & 1 deletion src/irmin-pack-tools/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ This folder contains several tools meant to provide usefull ways to debug and du
Currently, there are the following tools:
- [`ppcf`](#ppcf), a json printer for control files
- [`ppidx`](#ppidx), a json printer for index folders
- [`last_accessible`](#last_accessible), gives the informations of the last accessible commit registered in index
- [`tezos-explorer`](#tezos-explorer), a gui for a fast exploration of tezos stores

## ppcf
Expand Down Expand Up @@ -53,7 +54,7 @@ It is fairly straightforward:
```shell
$ dune exec -- irmin-ppidx <path-to-store>
```
The second one is the path to the root of the store, not the index folder (e.g. `output/root/` where `output/root/index/` exists)
The first argument is the path to the root of the store, not the index folder (e.g. `output/root/` where `output/root/index/` exists)

It will typically give the following json output, with one line per entry registered:
```
Expand Down Expand Up @@ -95,5 +96,37 @@ However, it might be useful to sort the json output using the offset. You can do
$ jq -s 'sort_by(.off)' -- index
```

## last-valid
This tool searches the index for the commit with the biggest offset still within the size of the suffix files.
It should be usefull whenever someone wants to know to where they can safely revert to instead of reinstalling the whole store, in case of inconsistencies.
It is fairly straightforward:
```shell
$ dune exec -- irmin-last_accessible <path-to-store>
```
The first argument is the path to the root of the store, not the index folder (e.g. `output/root/` where `output/root/index/` exists)
Useful debugging informations can also be printed by setting the right verbosity using the `verbose` or`verbosity=...` flags.

It will typically give the following json output:
```
{"hash":"oJgRv9/bIHkNAcbG8TaElD7P/UwXMWL0lrTXbabCqdo=","off":97297154,"len":103,"kind":"Commit_v2"}
```
Which can be further improved using the tool `jq`:
```shell
$ dune exec -- irmin-last_accessible <path-to-store> | jq
```
Will give:
```json
{
"hash": "oJgRv9/bIHkNAcbG8TaElD7P/UwXMWL0lrTXbabCqdo=",
"off": 97297154,
"len": 103,
"kind": "Commit_v2"
}
```

Note that this tool makes several assumptions about the state of the store:
- The store is an upper store.
- The control file holds the right informations in the case of a gced store

## tezos-explorer
TODO
8 changes: 8 additions & 0 deletions src/irmin-pack-tools/last_accessible/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
(executable
(public_name irmin-last_accessible)
(package irmin-pack-tools)
(name last_accessible)
(modules last_accessible)
(libraries irmin-pack irmin-pack.unix cmdliner logs)
(preprocess
(pps ppx_irmin.internal)))
102 changes: 102 additions & 0 deletions src/irmin-pack-tools/last_accessible/last_accessible.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
module Hash = Irmin.Hash.SHA256
module Index = Irmin_pack_unix.Index.Make (Hash)
module Int63 = Optint.Int63
module Io = Irmin_pack_unix.Io.Unix
module Io_errors = Irmin_pack_unix.Io_errors.Make (Io)
module Upper_control = Irmin_pack_unix.Control_file.Upper (Io)

let src =
Logs.Src.create "irmin-pack-tools" ~doc:"irmin-pack-tools last accessible"

module Log = (val Logs.src_log src : Logs.LOG)

type t = { hash : string; off : Int63.t; len : int; kind : string }
[@@deriving irmin]

let get_payload root =
let control_file = Irmin_pack.Layout.V5.control ~root in
let r = Upper_control.read_payload ~path:control_file in
match r with Error err -> Io_errors.raise_error err | Ok payload -> payload

let get_suffixes_sizes root
(payload : Irmin_pack_unix.Control_file.Payload.Upper.V5.t) =
let r = ref 0 in
for i = payload.chunk_start_idx to payload.chunk_num do
let suffix = Irmin_pack.Layout.V5.suffix_chunk ~chunk_idx:i ~root in
let stats = Unix.stat suffix in
[%log.debug "found chunk at '%s' with size %d" suffix stats.st_size];
r := !r + stats.st_size
done;
[%log.debug "sum of found suffixes sizes: %d" !r];
!r

let get_status_infos (payload : Irmin_pack_unix.Control_file.Payload.Upper.V5.t)
=
match payload.status with
| Gced gced ->
[%log.debug
"store status: gced, found suffix start offset at %a and %a dead bytes"
Int63.pp gced.suffix_start_offset Int63.pp gced.suffix_dead_bytes];
( Int63.to_int gced.suffix_start_offset,
Int63.to_int gced.suffix_dead_bytes )
| _ -> (0, 0)

let get_last_accessible root off_max =
let r = ref { hash = "n/a"; off = Int63.zero; len = 0; kind = "n/a" } in
let off_max = Int63.of_int off_max in
let f k v =
let hash = Base64.encode_exn (Index.Key.encode k) in
let off, len, kind = v in
match (kind : Irmin_pack.Pack_value.Kind.t) with
| (Commit_v1 | Commit_v2) when off > off_max ->
[%log.warn
"found commit with offset %a in index (higher than maximum \
accessible offset)"
Int63.pp off]
| (Commit_v1 | Commit_v2) when Int63.add off (Int63.of_int len) > off_max ->
[%log.warn
"found commit with offset %a and length %d in index (too long to fit \
in the maximum accessible offset)"
Int63.pp off len]
| (Commit_v1 | Commit_v2) when off >= !r.off ->
let kind = Fmt.str "%a" Irmin_pack.Pack_value.Kind.pp kind in
r := { hash; off; len; kind }
| _ -> ()
in
let v = Index.v_exn ~readonly:true ~log_size:500_000 root in
Index.iter f v;
!r

let main root_folder () =
let payload = get_payload root_folder in
let sizes = get_suffixes_sizes root_folder payload in
let start_offset, dead_bytes = get_status_infos payload in
let off_max = sizes + start_offset - dead_bytes in
[%log.debug "last accessible offset: %d" off_max];
let last_accessible = get_last_accessible root_folder off_max in
Fmt.pr "%a@." (Irmin.Type.pp_json t) last_accessible

(** Cmdliner **)

open Cmdliner

let setup_log level =
Logs.set_level level;
Logs.set_reporter (Logs_fmt.reporter ())

let root_folder =
Arg.(
required
& pos 0 (some string) None
& info [] ~docv:"root folder" ~doc:"the path to the store")

let main_cmd =
let doc =
"gives the last accessible commit informations stored in the index of an \
upper store"
in
let info = Cmd.info "irmin-last_accessible" ~doc in
Cmd.v info
Term.(const main $ root_folder $ (const setup_log $ Logs_cli.level ()))

let () = exit (Cmd.eval ~catch:false main_cmd)

0 comments on commit 2d091a6

Please sign in to comment.