Skip to content

Commit

Permalink
Merge pull request #146 from mirage/new-tar-gz
Browse files Browse the repository at this point in the history
New tar gz
  • Loading branch information
dinosaure committed Aug 4, 2024
2 parents 3bad2c2 + 4215ff0 commit ffdd7e9
Show file tree
Hide file tree
Showing 18 changed files with 1,623 additions and 1,048 deletions.
2 changes: 1 addition & 1 deletion bin/dune
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
(name otar)
(public_name otar)
(package tar-unix)
(libraries unix tar.gz))
(libraries unix tar.gz tar_unix))
150 changes: 74 additions & 76 deletions bin/otar.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,75 +16,72 @@

let () = Printexc.record_backtrace true

module Tar_gz = Tar_gz.Make
(struct type 'a t = 'a
let ( >>= ) x f = f x
let return x = x end)
(struct type out_channel = Stdlib.out_channel
type 'a io = 'a
let really_write oc str =
output_string oc str end)
(struct type in_channel = Stdlib.in_channel
type 'a io = 'a
let read ic buf =
input ic buf 0 (Bytes.length buf)
end)


let ( / ) = Filename.concat

let stream_of_fd fd =
let buf = Bytes.create 0x1000 in
fun () -> match Unix.read fd buf 0 (Bytes.length buf) with
| 0 -> None
| len ->
let str = Bytes.sub_string buf 0 len in
Some str
| exception End_of_file -> None
let contents_of_path path =
let fd = ref `None in
let buf = Bytes.create 0x100 in
let rec dispenser () = match !fd with
| `Closed -> Tar.return (Ok None)
| `None ->
let fd' = Unix.openfile path Unix.[ O_RDONLY; O_CLOEXEC ] 0o644 in
fd := `Active fd';
dispenser ()
| `Active fd' ->
match Unix.read fd' buf 0 (Bytes.length buf) with
| 0 | exception End_of_file ->
Unix.close fd'; fd := `Closed; Tar.return (Ok None)
| len ->
let str = Bytes.sub_string buf 0 len in
Tar.return (Ok (Some str)) in
dispenser

let always x = fun _ -> x
let to_stream lst =
let lst = ref lst in
fun () -> match !lst with
| [] -> None
| x :: r -> lst := r; Some x

let create_tarball directory oc =
let create_tarball directory fd =
let files = Sys.readdir directory in
let os = match Sys.os_type with
| "Win32" -> Gz.NTFS (* XXX(dinosaure): true? *)
| "Unix" | "Cygwin" | _ -> Gz.Unix in
let mtime = Unix.gettimeofday () in
let out_channel = Tar_gz.of_out_channel ~level:4 ~mtime:(Int32.of_float mtime) os oc in
let hdr = Tar.Header.make ~file_mode:0o755
~mod_time:(Int64.of_float mtime) (Filename.concat directory "") 0L in
(match Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel (always None) with
| Ok () -> ()
| Error `Msg msg -> Format.eprintf "Error %s writing block\n%!" msg);
Array.iter begin fun filename ->
let fd = Unix.openfile (directory / filename) Unix.[ O_RDONLY; O_CLOEXEC ] 0o644 in
let stat = Unix.LargeFile.lstat (directory / filename) in
match stat.st_kind with
| Unix.S_REG ->
let stream = stream_of_fd fd in
let file_mode = if stat.Unix.LargeFile.st_perm land 0o111 <> 0 then 0o755 else 0o644 in
let mod_time = Int64.of_float stat.Unix.LargeFile.st_mtime in
let user_id = stat.Unix.LargeFile.st_uid in
let group_id = stat.Unix.LargeFile.st_gid in
let hdr = Tar.Header.make
let dir_hdr = Tar.Header.make ~file_mode:0o755
~mod_time:(Int64.of_float mtime) (Filename.concat directory "") 0L in
let dir_entry = (None, dir_hdr, (fun () -> Tar.return (Ok None))) in
let entries = Array.fold_left begin fun acc filename ->
let stat = Unix.LargeFile.stat (directory / filename) in
match stat.st_kind with
| Unix.S_REG ->
let file_mode = if stat.st_perm land 0o111 <> 0 then 0o755 else 0o644 in
let mod_time = Int64.of_float stat.Unix.LargeFile.st_mtime in
let user_id = stat.st_uid in
let group_id = stat.st_gid in
let level = Some Tar.Header.Ustar in
let hdr = Tar.Header.make
~file_mode ~mod_time ~user_id ~group_id
(directory / filename) stat.Unix.LargeFile.st_size in
(match Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel stream with
| Ok () -> ()
| Error `Msg msg -> Format.eprintf "Error %s writing block\n%!" msg);
Unix.close fd ;
| _ ->
Format.eprintf "Skipping non-regular file %s\n" (Filename.concat directory filename)
end files ;
Tar_gz.write_end out_channel
(directory / filename) stat.st_size in
(level, hdr, contents_of_path (directory / filename)) :: acc
| _ -> acc end [] files in
let entries = to_stream (dir_entry :: entries) in
let entries () = Tar.return (Ok (entries ())) in
let t = Tar.out ~level:Tar.Header.Ustar entries in
let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_float mtime) os t in
match Tar_unix.run t fd with
| Ok () -> ()
| Error err ->
Format.eprintf "%s: %a\n%!" Sys.executable_name Tar_unix.pp_error err

let make directory oc =
let oc, oc_close, _gz = match oc with
| None -> stdout, ignore, false
let fd, fd_close = match oc with
| None -> Unix.stdout, ignore
| Some filename ->
let oc = open_out filename in
oc, (fun () -> close_out oc), Filename.extension filename = ".gz" in
create_tarball directory oc ; oc_close ()
let fd = Unix.openfile filename Unix.[ O_TRUNC; O_CREAT; O_WRONLY; O_CLOEXEC ] 0o644 in
fd, (fun () -> Unix.close fd) in
Fun.protect ~finally:fd_close @@ fun () ->
create_tarball directory fd

let sizes = [| "B"; "KiB"; "MiB"; "GiB"; "TiB"; "PiB"; "EiB"; "ZiB"; "YiB" |]

Expand All @@ -97,27 +94,28 @@ let bytes_to_size ?(decimals = 2) ppf = function
Format.fprintf ppf "%.*f %s" decimals r sizes.(int_of_float i)

let list filename =
let ic = open_in filename in
let ic = Tar_gz.of_in_channel ~internal:(De.bigstring_create 0x1000) ic in
let rec go global () = match Tar_gz.HeaderReader.read ~global ic with
| Ok (hdr, global) ->
Format.printf "%s (%s, %a)\n%!"
hdr.Tar.Header.file_name
(Tar.Header.Link.to_string hdr.link_indicator)
(bytes_to_size ~decimals:2) hdr.Tar.Header.file_size ;
(* Alternatively:
let padding = Tar.Header.compute_zero_padding_length hdr in
let data = Int64.to_int hdr.Tar.Header.file_size in
let to_skip = data + padding in *)
let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in
Tar_gz.skip ic to_skip ;
go global ()
| Error `Eof -> ()
| Error `Fatal e ->
Format.eprintf "Error listing archive: %a\n%!" Tar.pp_error e;
exit 2
let go ?global:_ hdr () =
Format.printf "%s (%s, %a)\n%!"
hdr.Tar.Header.file_name
(Tar.Header.Link.to_string hdr.link_indicator)
(bytes_to_size ~decimals:2) hdr.Tar.Header.file_size ;
let open Tar in
let* _ = seek (Int64.to_int hdr.Tar.Header.file_size) in
return (Ok ())
in
go None ()
let fd = Unix.openfile filename [ Unix.O_RDONLY ] 0 in
match Tar_unix.run (Tar_gz.in_gzipped (Tar.fold go ())) fd with
| Ok () -> ()
| Error (`Unix _) ->
Format.eprintf "Some UNIX error occurred.\n%!"
| Error (`Msg e) ->
Format.eprintf "Some error: %s.\n%!" e
| Error (`Unexpected_end_of_file | `Eof) ->
Format.eprintf "Unexpected end of file.\n%!"
| Error `Gz err ->
Format.eprintf "Some Gzip error occurred: %s.\n%!" err
| Error (`Fatal _) ->
Format.eprintf "Some fatal error occurred.\n%!"

let () = match Sys.argv with
| [| _; "list"; filename; |] when Sys.file_exists filename ->
Expand Down
2 changes: 1 addition & 1 deletion dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
(tags ("org:xapi-project" "org:mirage"))
(depends
(ocaml (>= 5.00.0))
(eio (and (>= 0.10.0) (< 0.12)))
(eio (and (>= 1.1) (< 1.2)))
(tar (= :version))
)
)
109 changes: 78 additions & 31 deletions eio/tar_eio.ml
Original file line number Diff line number Diff line change
Expand Up @@ -18,42 +18,64 @@

open Eio

module Monad = struct
type 'a t = 'a
let (>>=) a f = f a
let return = Fun.id
end
module High : sig
type t
type 'a s = 'a

module Io = struct
type in_channel = Flow.source
type 'a io = 'a
let really_read f b =
let len = Bytes.length b in
let cs = Cstruct.create len in
Flow.read_exact f cs;
Cstruct.blit_to_bytes cs 0 b 0 len
let skip f (n: int) =
let buffer_size = 32768 in
let buffer = Cstruct.create buffer_size in
let rec loop (n: int) =
if n <= 0 then ()
else
let amount = min n buffer_size in
let block = Cstruct.sub buffer 0 amount in
Flow.read_exact f block;
loop (n - amount) in
loop n
external inj : 'a s -> ('a, t) Tar.io = "%identity"
external prj : ('a, t) Tar.io -> 'a s = "%identity"
end = struct
type t
type 'a s = 'a

type out_channel = Flow.sink
let really_write f str = Flow.write f [ Cstruct.of_string str ]
external inj : 'a -> 'b = "%identity"
external prj : 'b -> 'a = "%identity"
end

let really_read = Flow.read_exact
let skip = Io.skip
let really_write f b = Flow.write f [ b ]
type t = High.t

let value v = Tar.High (High.inj v)

module HeaderReader = Tar.HeaderReader(Monad)(Io)
module HeaderWriter = Tar.HeaderWriter(Monad)(Io)
let run_read_only t f =
let rec run : type a. (a, 'err, t) Tar.t -> (a, 'err) result = function
| Tar.Write _ -> assert false
| Tar.Read len ->
let b = Cstruct.create len in
(match Flow.single_read f b with
| len ->
Ok (Cstruct.to_string ~len b)
| exception End_of_file ->
(* XXX: should we catch other exceptions?! *)
Error `Unexpected_end_of_file)
| Tar.Really_read len ->
let b = Cstruct.create len in
(try
Flow.read_exact f b;
Ok (Cstruct.to_string b)
with End_of_file -> Error `Unexpected_end_of_file)
| Tar.Seek n ->
let buffer_size = 32768 in
let buffer = Cstruct.create buffer_size in
let rec loop (n: int) =
if n <= 0 then Ok ()
else
let amount = min n buffer_size in
let block = Cstruct.sub buffer 0 amount in
Flow.read_exact f block;
loop (n - amount) in
loop n
| Tar.Return value -> value
| Tar.High value -> High.prj value
| Tar.Bind (x, f) ->
match run x with
| Ok value -> run (f value)
| Error _ as err -> err in
run t

let fold f filename init =
(* XXX(reynir): ??? *)
Eio.Path.with_open_in filename
(run_read_only (Tar.fold f init))

(* Eio needs a non-file-opening stat. *)
let stat path =
Expand All @@ -79,3 +101,28 @@ let header_of_file ?level ?getpwuid ?getgrgid filepath : Tar.Header.t =
let devminor = if level = Ustar then stat.rdev |> Int64.to_int else 0 in
Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name
?uname ?gname ~devmajor ~devminor (snd filepath) file_size
let extract ?filter:(_ = fun _ -> true) ~src:_ _dst =
(* TODO *)
failwith "TODO"

let create ?level:_ ?global:_ ?filter:(_ = fun _ -> true) ~src:_ _dst =
(* TODO *)
failwith "TODO"

let append_file ?level:_ ?header:_ _filename _dst =
(* TODO *)
failwith "TODO"

let write_header ?level:_ _hdr _fl =
(* TODO *)
failwith "TODO"

let write_global_extended_header ?level:_ _global _fl =
(* TODO *)
failwith "TODO"

let write_end fl =
let zero_block = Cstruct.of_string Tar.Header.zero_block in
(* TODO: catch exceptions?! *)
Eio.Flow.write fl [ zero_block; zero_block ];
Ok ()
52 changes: 40 additions & 12 deletions eio/tar_eio.mli
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,20 @@

(** I/O for tar-formatted data *)

val really_read: Eio.Flow.source -> Cstruct.t -> unit
(** [really_read fd buf] fills [buf] with data from [fd] or fails
with {!Stdlib.End_of_file}. *)
type t

val really_write: Eio.Flow.sink -> Cstruct.t -> unit
(** [really_write fd buf] writes the full contents of [buf] to
[fd] or fails with {!Stdlib.End_of_file}. *)
val value : ('a, 'err) result -> ('a, 'err, t) Tar.t

val skip : Eio.Flow.source -> int -> unit
(** [skip fd n] reads [n] bytes from [fd] and discards them. If possible, you
should use [Lwt_unix.lseek fd n Lwt_unix.SEEK_CUR] instead. *)
val run_read_only : ('a, [> `Unexpected_end_of_file] as 'b, t) Tar.t -> [> `R ] Eio.Flow.source -> ('a, 'b) result

val fold :
(?global:Tar.Header.Extended.t ->
Tar.Header.t ->
'a ->
('a, [> `Fatal of Tar.error | `Unexpected_end_of_file ] as 'b, t) Tar.t) ->
Eio.Fs.dir_ty Eio.Path.t ->
'a ->
('a, 'b) result

(** Return the header needed for a particular file on disk. [getpwuid] and [getgrgid] are optional
functions that should take the uid and gid respectively and return the passwd and group entry
Expand All @@ -35,8 +38,33 @@ val header_of_file :
?level:Tar.Header.compatibility ->
?getpwuid:(int64 -> string) ->
?getgrgid:(int64 -> string) ->
Eio.Fs.dir Eio.Path.t ->
Eio.Fs.dir_ty Eio.Path.t ->
Tar.Header.t

module HeaderReader : Tar.HEADERREADER with type in_channel = Eio.Flow.source and type 'a io = 'a
module HeaderWriter : Tar.HEADERWRITER with type out_channel = Eio.Flow.sink and type 'a io = 'a
val extract : ?filter:(Tar.Header.t -> bool) ->
src:Eio.Fs.dir_ty Eio.Path.t ->
Eio.Fs.dir_ty Eio.Path.t ->
(unit, _) result

val create : ?level:Tar.Header.compatibility ->
?global:Tar.Header.Extended.t ->
?filter:(Tar.Header.t -> bool) ->
src:Eio.Fs.dir_ty Eio.Path.t ->
Eio.Fs.dir_ty Eio.Path.t ->
(unit, _) result

val append_file : ?level:Tar.Header.compatibility ->
?header:Tar.Header.t ->
Eio.Fs.dir_ty Eio.Path.t ->
[> `W ] Eio.Flow.sink ->
(unit, _) result

val write_header : ?level:Tar.Header.compatibility ->
Tar.Header.t -> [> `W ] Eio.Flow.sink ->
(unit, _) result

val write_global_extended_header : ?level:Tar.Header.compatibility ->
Tar.Header.Extended.t -> [> `W ] Eio.Flow.sink ->
(unit, _) result

val write_end : [> `W ] Eio.Flow.sink -> (unit, _) result
Loading

0 comments on commit ffdd7e9

Please sign in to comment.