Skip to content

Commit

Permalink
Add eio backend for tar
Browse files Browse the repository at this point in the history
  • Loading branch information
patricoferris committed Jul 12, 2023
1 parent ec50b97 commit e8c7e4b
Show file tree
Hide file tree
Showing 5 changed files with 310 additions and 0 deletions.
15 changes: 15 additions & 0 deletions dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,18 @@
(tar-unix (and :with-test (= :version)))
)
)
(package
(name tar-eio)
(synopsis "Decode and encode tar format files using Eio")
(description
"\| tar is a library to read and write tar files with an emphasis on
"\| streaming. This library uses Eio to provide a portable tar library.
)
(tags ("org:xapi-project" "org:mirage"))
(depends
(ocaml (>= 4.08.0))
(eio (>= 0.10.0))
(tar (= :version))
)
)
4 changes: 4 additions & 0 deletions eio/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
(library
(name tar_eio)
(public_name tar-eio)
(libraries tar eio))
184 changes: 184 additions & 0 deletions eio/tar_eio.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
(*
* Copyright (C) 2006-2013 Citrix Systems Inc.
* Copyright (C) 2012 Thomas Gazagnaire <[email protected]>
* Copyright (C) 2023 Patrick Ferris <[email protected]>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)

open Eio

module Monad = struct
type 'a t = 'a
let (>>=) a f = f a
let return = Fun.id
let return_unit = ()
end

module Reader = struct
type in_channel = Flow.source
type 'a t = 'a Monad.t
let really_read f b = Flow.read_exact f b |> Monad.return
let skip f (n: int) =
let open Monad in
let buffer_size = 32768 in
let buffer = Cstruct.create buffer_size in
let rec loop (n: int) =
if n <= 0 then Monad.return ()
else
let amount = min n buffer_size in
let block = Cstruct.sub buffer 0 amount in
really_read f block >>= fun () ->
loop (n - amount) in
loop n
end
let really_read = Reader.really_read

module Writer = struct
type out_channel = Flow.sink
type 'a t = 'a Monad.t
let really_write f b = Flow.write f [ b ] |> Monad.return
end
let really_write = Writer.really_write

let copy_n ifd ofd n =
let open Monad in
let block_size = 32768 in
let buffer = Cstruct.create block_size in
let rec loop remaining =
if remaining = 0L then Monad.return_unit else begin
let this = Int64.(to_int (min (of_int block_size) remaining)) in
let block = Cstruct.sub buffer 0 this in
really_read ifd block >>= fun () ->
really_write ofd block >>= fun () ->
loop (Int64.(sub remaining (of_int this)))
end in
loop n

module HR = Tar.HeaderReader(Monad)(Reader)
module HW = Tar.HeaderWriter(Monad)(Writer)

let get_next_header ?level ~global ic =
match HR.read ?level ~global (ic :> Flow.source) with
| Error `Eof -> Monad.return None
| Ok hdrs -> Monad.return (Some hdrs)

(* Eio needs a non-file-opening stat. *)
let stat path =
Eio.Path.with_open_in path @@ fun f ->
Eio.File.stat f

(** Return the header needed for a particular file on disk *)
let header_of_file ?level ?getpwuid ?getgrgid filepath : Tar.Header.t =
let level = match level with None -> Tar.Header.V7 | Some level -> level in
let stat = stat filepath in
let pwent = Option.map (fun f -> f stat.uid) getpwuid in
let grent = Option.map (fun f -> f stat.gid) getgrgid in
let uname = if level = V7 then Some "" else pwent in
let gname = if level = V7 then Some "" else grent in
let file_mode = stat.perm in
let user_id = stat.uid |> Int64.to_int in
let group_id = stat.gid |> Int64.to_int in
let file_size = stat.size |> Optint.Int63.to_int64 in
let mod_time = Int64.of_float stat.mtime in
let link_indicator = Tar.Header.Link.Normal in
let link_name = "" in
let devmajor = if level = Ustar then stat.dev |> Int64.to_int else 0 in
let devminor = if level = Ustar then stat.rdev |> Int64.to_int else 0 in
Monad.return (Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name
?uname ?gname ~devmajor ~devminor (snd filepath) file_size)

let write_block ?level ?global (header: Tar.Header.t) (body: #Flow.sink -> unit) sink =
HW.write ?level ?global header (sink :> Flow.sink);
body sink;
really_write sink (Tar.Header.zero_padding header)

let write_end sink =
really_write sink Tar.Header.zero_block;
really_write sink Tar.Header.zero_block

(** Utility functions for operating over whole tar archives *)
module Archive = struct

(** Read the next header, apply the function 'f' to the fd and the header. The function
should leave the fd positioned immediately after the datablock. Finally the function
skips past the zero padding to the next header *)
let with_next_file src ~(global: Tar.Header.Extended.t option)
(f: Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) =
match get_next_header ~global src with
| Some (hdr, global) ->
let result = f src global hdr in
Reader.skip src (Tar.Header.compute_zero_padding_length hdr);
Some result
| None ->
None

(** List the contents of a tar *)
let list ?level fd =
let rec loop global acc =
match get_next_header ?level ~global (fd :> Flow.source) with
| None -> Monad.return (List.rev acc)
| Some (hdr, global) ->
Reader.skip fd (Int64.to_int hdr.Tar.Header.file_size);
Reader.skip fd (Tar.Header.compute_zero_padding_length hdr);
loop global (hdr :: acc) in
loop None []

(** Extract the contents of a tar to directory 'dest' *)
let extract dest ifd =
let rec loop global () =
match get_next_header ~global ifd with
| None -> Monad.return_unit
| Some (hdr, global) ->
let filename = dest hdr.Tar.Header.file_name in
Eio.Path.(with_open_out ~create:(`Exclusive 0) filename) @@ fun ofd ->
copy_n ifd ofd hdr.Tar.Header.file_size;
Reader.skip ifd (Tar.Header.compute_zero_padding_length hdr);
loop global ()
in
loop None ()

let transform ?level f (ifd : #Flow.source) (ofd : #Flow.sink) =
let rec loop global () =
match get_next_header ~global ifd with
| None -> Monad.return_unit
| Some (header', global') ->
let header = f header' in
let body = fun _ -> copy_n ifd ofd header.Tar.Header.file_size in
write_block ?level ?global:(if global <> global' then global' else None) header body ofd;
Reader.skip ifd (Tar.Header.compute_zero_padding_length header');
loop global' ()
in
loop None ();
write_end ofd

(** Create a tar on file descriptor fd from the filename list
'files' *)
let create ?getpwuid ?getgrgid files ofd =
let file filename =
let stat = stat filename in
if stat.kind <> `Regular_file then
(* Skipping, not a regular file. *)
Monad.return_unit
else begin
let hdr = header_of_file ?getpwuid ?getgrgid filename in
write_block hdr (fun ofd ->
Eio.Path.with_open_in filename @@ fun ifd ->
copy_n ifd ofd hdr.Tar.Header.file_size
) ofd
end in
List.iter file files;
(* Add two empty blocks *)
write_end ofd

end
69 changes: 69 additions & 0 deletions eio/tar_eio.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
(*
* Copyright (C) 2006-2009 Citrix Systems Inc.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*)

(** I/O for tar-formatted data *)

(** Returns the next header block or None if two consecutive
zero-filled blocks are discovered. Assumes stream is positioned at the
possible start of a header block.
@raise End_of_file if the stream unexpectedly fails. *)
val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option -> Eio.Flow.source ->
(Tar.Header.t * Tar.Header.Extended.t option) option

(** Return the header needed for a particular file on disk. [getpwuid] and [getgrgid] are optional
functions that should take the uid and gid respectively and return the passwd and group entry
names for each. These will be added to the header. *)
val header_of_file :
?level:Tar.Header.compatibility ->
?getpwuid:(int64 -> string) ->
?getgrgid:(int64 -> string) ->
Eio.Fs.dir Eio.Path.t ->
Tar.Header.t

module Archive : sig
(** Utility functions for operating over whole tar archives *)

(** Read the next header, apply the function 'f' to the source and the header. The function
should leave the source positioned immediately after the datablock. Finally the function
skips past the zero padding to the next header. *)
val with_next_file : Eio.Flow.source -> global:Tar.Header.Extended.t option ->
(Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) -> 'a option

(** List the contents of a tar to stdout. *)
val list : ?level:Tar.Header.compatibility -> #Eio.Flow.source -> Tar.Header.t list

(** [extract dest] extract the contents of a tar.
Apply [dest] on each source filename to change the destination
filename. It only supports extracting regular files from the
top-level of the archive. *)
val extract : (string -> Eio.Fs.dir Eio.Path.t) -> Eio.Flow.source -> unit

(** [transform f src sink] applies [f] to the header of each
file in the tar inputted in [src], and writes the resulting
headers to [sink] preserving the content and structure of the
archive. *)
val transform : ?level:Tar.Header.compatibility -> (Tar.Header.t -> Tar.Header.t) -> #Eio.Flow.source -> #Eio.Flow.sink -> unit

(** Create a tar in the sink from a list of file paths. It only supports regular files.
See {! header_of_file} for the meaning of [getpwuid] and [getgrgid]. *)
val create :
?getpwuid:(int64 -> string) ->
?getgrgid:(int64 -> string) ->
Eio.Fs.dir Eio.Path.t list ->
#Eio.Flow.sink ->
unit
end
38 changes: 38 additions & 0 deletions tar-eio.opam
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
synopsis: "Decode and encode tar format files using Eio"
description: """
tar is a library to read and write tar files with an emphasis on
streaming. This library uses Eio to provide a portable tar library.
"""
maintainer: ["Reynir Björnsson <[email protected]>" "[email protected]"]
authors: ["Dave Scott" "Thomas Gazagnaire" "David Allsopp" "Antonin Décimo"]
license: "ISC"
tags: ["org:xapi-project" "org:mirage"]
homepage: "https://github.com/mirage/ocaml-tar"
doc: "https://mirage.github.io/ocaml-tar/"
bug-reports: "https://github.com/mirage/ocaml-tar/issues"
depends: [
"dune" {>= "2.9"}
"ocaml" {>= "4.08.0"}
"eio" {>= "0.10.0"}
"tar" {= version}
"odoc" {with-doc}
]
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"--promote-install-files=false"
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
["dune" "install" "-p" name "--create-install-files" name]
]
dev-repo: "git+https://github.com/mirage/ocaml-tar.git"

0 comments on commit e8c7e4b

Please sign in to comment.