diff --git a/dune-project b/dune-project index 2286445..a3f6da8 100644 --- a/dune-project +++ b/dune-project @@ -80,3 +80,18 @@ (tar-unix (and :with-test (= :version))) ) ) + +(package + (name tar-eio) + (synopsis "Decode and encode tar format files using Eio") + (description +"\| tar is a library to read and write tar files with an emphasis on +"\| streaming. This library uses Eio to provide a portable tar library. + ) + (tags ("org:xapi-project" "org:mirage")) + (depends + (ocaml (>= 4.08.0)) + (eio (>= 0.10.0)) + (tar (= :version)) + ) +) diff --git a/eio/dune b/eio/dune new file mode 100644 index 0000000..0ec6d2e --- /dev/null +++ b/eio/dune @@ -0,0 +1,4 @@ +(library + (name tar_eio) + (public_name tar-eio) + (libraries tar eio)) diff --git a/eio/tar_eio.ml b/eio/tar_eio.ml new file mode 100644 index 0000000..ecf391b --- /dev/null +++ b/eio/tar_eio.ml @@ -0,0 +1,184 @@ +(* + * Copyright (C) 2006-2013 Citrix Systems Inc. + * Copyright (C) 2012 Thomas Gazagnaire + * Copyright (C) 2023 Patrick Ferris + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *) + +open Eio + +module Monad = struct + type 'a t = 'a + let (>>=) a f = f a + let return = Fun.id + let return_unit = () +end + +module Reader = struct + type in_channel = Flow.source + type 'a t = 'a Monad.t + let really_read f b = Flow.read_exact f b |> Monad.return + let skip f (n: int) = + let open Monad in + let buffer_size = 32768 in + let buffer = Cstruct.create buffer_size in + let rec loop (n: int) = + if n <= 0 then Monad.return () + else + let amount = min n buffer_size in + let block = Cstruct.sub buffer 0 amount in + really_read f block >>= fun () -> + loop (n - amount) in + loop n +end +let really_read = Reader.really_read + +module Writer = struct + type out_channel = Flow.sink + type 'a t = 'a Monad.t + let really_write f b = Flow.write f [ b ] |> Monad.return +end +let really_write = Writer.really_write + +let copy_n ifd ofd n = + let open Monad in + let block_size = 32768 in + let buffer = Cstruct.create block_size in + let rec loop remaining = + if remaining = 0L then Monad.return_unit else begin + let this = Int64.(to_int (min (of_int block_size) remaining)) in + let block = Cstruct.sub buffer 0 this in + really_read ifd block >>= fun () -> + really_write ofd block >>= fun () -> + loop (Int64.(sub remaining (of_int this))) + end in + loop n + +module HR = Tar.HeaderReader(Monad)(Reader) +module HW = Tar.HeaderWriter(Monad)(Writer) + +let get_next_header ?level ~global ic = + match HR.read ?level ~global (ic :> Flow.source) with + | Error `Eof -> Monad.return None + | Ok hdrs -> Monad.return (Some hdrs) + +(* Eio needs a non-file-opening stat. *) +let stat path = + Eio.Path.with_open_in path @@ fun f -> + Eio.File.stat f + +(** Return the header needed for a particular file on disk *) +let header_of_file ?level ?getpwuid ?getgrgid filepath : Tar.Header.t = + let level = match level with None -> Tar.Header.V7 | Some level -> level in + let stat = stat filepath in + let pwent = Option.map (fun f -> f stat.uid) getpwuid in + let grent = Option.map (fun f -> f stat.gid) getgrgid in + let uname = if level = V7 then Some "" else pwent in + let gname = if level = V7 then Some "" else grent in + let file_mode = stat.perm in + let user_id = stat.uid |> Int64.to_int in + let group_id = stat.gid |> Int64.to_int in + let file_size = stat.size |> Optint.Int63.to_int64 in + let mod_time = Int64.of_float stat.mtime in + let link_indicator = Tar.Header.Link.Normal in + let link_name = "" in + let devmajor = if level = Ustar then stat.dev |> Int64.to_int else 0 in + let devminor = if level = Ustar then stat.rdev |> Int64.to_int else 0 in + Monad.return (Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name + ?uname ?gname ~devmajor ~devminor (snd filepath) file_size) + +let write_block ?level ?global (header: Tar.Header.t) (body: #Flow.sink -> unit) sink = + HW.write ?level ?global header (sink :> Flow.sink); + body sink; + really_write sink (Tar.Header.zero_padding header) + +let write_end sink = + really_write sink Tar.Header.zero_block; + really_write sink Tar.Header.zero_block + +(** Utility functions for operating over whole tar archives *) +module Archive = struct + + (** Read the next header, apply the function 'f' to the fd and the header. The function + should leave the fd positioned immediately after the datablock. Finally the function + skips past the zero padding to the next header *) + let with_next_file src ~(global: Tar.Header.Extended.t option) + (f: Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) = + match get_next_header ~global src with + | Some (hdr, global) -> + let result = f src global hdr in + Reader.skip src (Tar.Header.compute_zero_padding_length hdr); + Some result + | None -> + None + + (** List the contents of a tar *) + let list ?level fd = + let rec loop global acc = + match get_next_header ?level ~global (fd :> Flow.source) with + | None -> Monad.return (List.rev acc) + | Some (hdr, global) -> + Reader.skip fd (Int64.to_int hdr.Tar.Header.file_size); + Reader.skip fd (Tar.Header.compute_zero_padding_length hdr); + loop global (hdr :: acc) in + loop None [] + + (** Extract the contents of a tar to directory 'dest' *) + let extract dest ifd = + let rec loop global () = + match get_next_header ~global ifd with + | None -> Monad.return_unit + | Some (hdr, global) -> + let filename = dest hdr.Tar.Header.file_name in + Eio.Path.(with_open_out ~create:(`Exclusive 0) filename) @@ fun ofd -> + copy_n ifd ofd hdr.Tar.Header.file_size; + Reader.skip ifd (Tar.Header.compute_zero_padding_length hdr); + loop global () + in + loop None () + + let transform ?level f (ifd : #Flow.source) (ofd : #Flow.sink) = + let rec loop global () = + match get_next_header ~global ifd with + | None -> Monad.return_unit + | Some (header', global') -> + let header = f header' in + let body = fun _ -> copy_n ifd ofd header.Tar.Header.file_size in + write_block ?level ?global:(if global <> global' then global' else None) header body ofd; + Reader.skip ifd (Tar.Header.compute_zero_padding_length header'); + loop global' () + in + loop None (); + write_end ofd + + (** Create a tar on file descriptor fd from the filename list + 'files' *) + let create ?getpwuid ?getgrgid files ofd = + let file filename = + let stat = stat filename in + if stat.kind <> `Regular_file then + (* Skipping, not a regular file. *) + Monad.return_unit + else begin + let hdr = header_of_file ?getpwuid ?getgrgid filename in + write_block hdr (fun ofd -> + Eio.Path.with_open_in filename @@ fun ifd -> + copy_n ifd ofd hdr.Tar.Header.file_size + ) ofd + end in + List.iter file files; + (* Add two empty blocks *) + write_end ofd + +end diff --git a/eio/tar_eio.mli b/eio/tar_eio.mli new file mode 100644 index 0000000..c604b7d --- /dev/null +++ b/eio/tar_eio.mli @@ -0,0 +1,69 @@ +(* + * Copyright (C) 2006-2009 Citrix Systems Inc. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *) + +(** I/O for tar-formatted data *) + +(** Returns the next header block or None if two consecutive + zero-filled blocks are discovered. Assumes stream is positioned at the + possible start of a header block. + @raise End_of_file if the stream unexpectedly fails. *) +val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option -> Eio.Flow.source -> + (Tar.Header.t * Tar.Header.Extended.t option) option + +(** Return the header needed for a particular file on disk. [getpwuid] and [getgrgid] are optional + functions that should take the uid and gid respectively and return the passwd and group entry + names for each. These will be added to the header. *) +val header_of_file : + ?level:Tar.Header.compatibility -> + ?getpwuid:(int64 -> string) -> + ?getgrgid:(int64 -> string) -> + Eio.Fs.dir Eio.Path.t -> + Tar.Header.t + +module Archive : sig + (** Utility functions for operating over whole tar archives *) + + (** Read the next header, apply the function 'f' to the source and the header. The function + should leave the source positioned immediately after the datablock. Finally the function + skips past the zero padding to the next header. *) + val with_next_file : Eio.Flow.source -> global:Tar.Header.Extended.t option -> + (Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) -> 'a option + + (** List the contents of a tar to stdout. *) + val list : ?level:Tar.Header.compatibility -> #Eio.Flow.source -> Tar.Header.t list + + (** [extract dest] extract the contents of a tar. + Apply [dest] on each source filename to change the destination + filename. It only supports extracting regular files from the + top-level of the archive. *) + val extract : (string -> Eio.Fs.dir Eio.Path.t) -> Eio.Flow.source -> unit + + (** [transform f src sink] applies [f] to the header of each + file in the tar inputted in [src], and writes the resulting + headers to [sink] preserving the content and structure of the + archive. *) + val transform : ?level:Tar.Header.compatibility -> (Tar.Header.t -> Tar.Header.t) -> #Eio.Flow.source -> #Eio.Flow.sink -> unit + + (** Create a tar in the sink from a list of file paths. It only supports regular files. + + See {! header_of_file} for the meaning of [getpwuid] and [getgrgid]. *) + val create : + ?getpwuid:(int64 -> string) -> + ?getgrgid:(int64 -> string) -> + Eio.Fs.dir Eio.Path.t list -> + #Eio.Flow.sink -> + unit +end diff --git a/tar-eio.opam b/tar-eio.opam new file mode 100644 index 0000000..259ce87 --- /dev/null +++ b/tar-eio.opam @@ -0,0 +1,38 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "Decode and encode tar format files using Eio" +description: """ +tar is a library to read and write tar files with an emphasis on +streaming. This library uses Eio to provide a portable tar library. +""" +maintainer: ["Reynir Björnsson " "dave@recoil.org"] +authors: ["Dave Scott" "Thomas Gazagnaire" "David Allsopp" "Antonin Décimo"] +license: "ISC" +tags: ["org:xapi-project" "org:mirage"] +homepage: "https://github.com/mirage/ocaml-tar" +doc: "https://mirage.github.io/ocaml-tar/" +bug-reports: "https://github.com/mirage/ocaml-tar/issues" +depends: [ + "dune" {>= "2.9"} + "ocaml" {>= "4.08.0"} + "eio" {>= "0.10.0"} + "tar" {= version} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "--promote-install-files=false" + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] + ["dune" "install" "-p" name "--create-install-files" name] +] +dev-repo: "git+https://github.com/mirage/ocaml-tar.git"