Skip to content

Commit

Permalink
Add a way to produce a *.tar.gz archive from the new pure API
Browse files Browse the repository at this point in the history
  • Loading branch information
dinosaure committed May 15, 2024
1 parent 890c1fe commit 6f4a26b
Show file tree
Hide file tree
Showing 10 changed files with 234 additions and 256 deletions.
115 changes: 52 additions & 63 deletions bin/otar.ml
Original file line number Diff line number Diff line change
Expand Up @@ -18,60 +18,64 @@ let () = Printexc.record_backtrace true

let ( / ) = Filename.concat

let stream_of_fd fd =
let buf = Bytes.create 0x1000 in
fun () -> match Unix.read fd buf 0 (Bytes.length buf) with
| 0 -> None
| len ->
let str = Bytes.sub_string buf 0 len in
Some str
| exception End_of_file -> None
let contents_of_path path =
let fd = ref `None in
let buf = Bytes.create 0x100 in
let rec dispenser () = match !fd with
| `Closed -> Tar.return (Ok None)
| `None ->
let fd' = Unix.openfile path Unix.[ O_RDONLY; O_CLOEXEC ] 0o644 in
fd := `Active fd';
dispenser ()
| `Active fd' ->
match Unix.read fd' buf 0 (Bytes.length buf) with
| 0 | exception End_of_file ->
Unix.close fd'; fd := `Closed; Tar.return (Ok None)
| len ->
let str = Bytes.sub_string buf 0 len in
Tar.return (Ok (Some str)) in
dispenser

let always x = fun _ -> x

(*
let create_tarball directory oc =
let create_tarball directory fd =
let files = Sys.readdir directory in
let os = match Sys.os_type with
| "Win32" -> Gz.NTFS (* XXX(dinosaure): true? *)
| "Unix" | "Cygwin" | _ -> Gz.Unix in
let mtime = Unix.gettimeofday () in
let out_channel = Tar_gz.of_out_channel ~level:4 ~mtime:(Int32.of_float mtime) os oc in
let hdr = Tar.Header.make ~file_mode:0o755
~mod_time:(Int64.of_float mtime) (Filename.concat directory "") 0L in
(match Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel (always None) with
| Ok () -> ()
| Error `Msg msg -> Format.eprintf "Error %s writing block\n%!" msg);
Array.iter begin fun filename ->
let fd = Unix.openfile (directory / filename) Unix.[ O_RDONLY; O_CLOEXEC ] 0o644 in
let stat = Unix.LargeFile.lstat (directory / filename) in
match stat.st_kind with
| Unix.S_REG ->
let stream = stream_of_fd fd in
let file_mode = if stat.Unix.LargeFile.st_perm land 0o111 <> 0 then 0o755 else 0o644 in
let mod_time = Int64.of_float stat.Unix.LargeFile.st_mtime in
let user_id = stat.Unix.LargeFile.st_uid in
let group_id = stat.Unix.LargeFile.st_gid in
let hdr = Tar.Header.make
~mod_time:(Int64.of_float mtime) (Filename.concat directory "") 0L in
let entries = Array.fold_left begin fun acc filename ->
let stat = Unix.LargeFile.stat (directory / filename) in
match stat.st_kind with
| Unix.S_REG ->
let file_mode = if stat.st_perm land 0o111 <> 0 then 0o755 else 0o644 in
let mod_time = Int64.of_float stat.Unix.LargeFile.st_mtime in
let user_id = stat.st_uid in
let group_id = stat.st_gid in
let level = Some Tar.Header.Ustar in
let hdr = Tar.Header.make
~file_mode ~mod_time ~user_id ~group_id
(directory / filename) stat.Unix.LargeFile.st_size in
(match Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel stream with
| Ok () -> ()
| Error `Msg msg -> Format.eprintf "Error %s writing block\n%!" msg);
Unix.close fd ;
| _ ->
Format.eprintf "Skipping non-regular file %s\n" (Filename.concat directory filename)
end files ;
Tar_gz.write_end out_channel
(directory / filename) stat.st_size in
(level, hdr, contents_of_path (directory / filename)) :: acc
| _ -> acc end [] files in
let entries = List.to_seq entries in
let entries = Seq.to_dispenser entries in
let entries () = Tar.return (Ok (entries ())) in
let t = Tar.out ~level:Tar.Header.Ustar hdr entries in
let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_float mtime) os t in
match Tar_unix.run t fd with
| Ok () -> ()
| Error err ->
Format.eprintf "%s: %a\n%!" Sys.executable_name Tar_unix.pp_error err

let make directory oc =
let oc, oc_close, _gz = match oc with
| None -> stdout, ignore, false
let fd, fd_close = match oc with
| None -> Unix.stdout, ignore
| Some filename ->
let oc = open_out filename in
oc, (fun () -> close_out oc), Filename.extension filename = ".gz" in
create_tarball directory oc ; oc_close ()
*)
let fd = Unix.openfile filename Unix.[ O_TRUNC; O_CREAT; O_WRONLY; O_CLOEXEC ] 0o644 in
fd, (fun () -> Unix.close fd) in
Fun.protect ~finally:fd_close @@ fun () ->
create_tarball directory fd

let sizes = [| "B"; "KiB"; "MiB"; "GiB"; "TiB"; "PiB"; "EiB"; "ZiB"; "YiB" |]

Expand All @@ -89,46 +93,31 @@ let list filename =
hdr.Tar.Header.file_name
(Tar.Header.Link.to_string hdr.link_indicator)
(bytes_to_size ~decimals:2) hdr.Tar.Header.file_size ;
(*
(* Alternatively:
let padding = Tar.Header.compute_zero_padding_length hdr in
let data = Int64.to_int hdr.Tar.Header.file_size in
let to_skip = data + padding in *)
Tar_gz.skip ic to_skip ;
go global ()
| Error `Eof -> ()
| Error `Fatal e ->
Format.eprintf "Error listing archive: %a\n%!" Tar.pp_error e;
exit 2
*)
let open Tar in
let to_skip = Header.(Int64.to_int (to_sectors hdr) * length) in
let* _ = seek to_skip in
let* _ = seek (Int64.to_int hdr.Tar.Header.file_size) in
return (Ok ())
in
let fd = Unix.openfile filename [ Unix.O_RDONLY ] 0 in
match Tar_unix.run (Tar_gz.gzipped (Tar.fold go ())) fd with
match Tar_unix.run (Tar_gz.in_gzipped (Tar.fold go ())) fd with
| Ok () -> ()
| Error (`Unix _) ->
Format.eprintf "Some UNIX error occurred.\n%!"
| Error (`Msg e) ->
Format.eprintf "Some error: %s.\n%!" e
| Error `Unexpected_end_of_file ->
| Error (`Unexpected_end_of_file | `Eof) ->
Format.eprintf "Unexpected end of file.\n%!"
| Error `Eof | Error `Gz _ ->
Format.eprintf "Some fatal error occurred.\n%!"
| Error `Gz err ->
Format.eprintf "Some Gzip error occurred: %s.\n%!" err
| Error (`Fatal _) ->
Format.eprintf "Some fatal error occurred.\n%!"

let () = match Sys.argv with
| [| _; "list"; filename; |] when Sys.file_exists filename ->
list filename
(*
| [| _; directory |] when Sys.is_directory directory ->
make directory None
| [| _; directory; output |] when Sys.is_directory directory ->
make directory (Some output)
*)
| _ ->
let cmd = Filename.basename Sys.argv.(0) in
Format.eprintf "%s <directory> [<filename.tar.gz>]\n%s list <filename.tar.gz>\n" cmd cmd
42 changes: 41 additions & 1 deletion lib/tar.ml
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,7 @@ let encode_unextended_header ?level header =
let encode_extended_header ?level scope hdr =
let link_indicator, link_indicator_name = match scope with
| `Per_file -> Header.Link.PerFileExtendedHeader, "paxheader"
| `Global ->Header.Link.GlobalExtendedHeader, "pax_global_header"
| `Global -> Header.Link.GlobalExtendedHeader, "pax_global_header"
| _ -> assert false
in
let pax_payload = Header.Extended.marshal hdr in
Expand Down Expand Up @@ -825,12 +825,14 @@ type ('a, 'err, 't) t =
| Bind : ('a, 'err, 't) t * ('a -> ('b, 'err, 't) t) -> ('b, 'err, 't) t
| Return : ('a, 'err) result -> ('a, 'err, 't) t
| High : (('a, 'err) result, 't) io -> ('a, 'err, 't) t
| Write : string -> (unit, 'err, 't) t

let ( let* ) x f = Bind (x, f)
let return x = Return x
let really_read n = Really_read n
let read n = Read n
let seek n = Seek n
let write str = Write str

type ('a, 'err, 't) fold = (?global:Header.Extended.t -> Header.t -> 'a -> ('a, 'err, 't) t) -> 'a -> ('a, 'err, 't) t

Expand Down Expand Up @@ -859,3 +861,41 @@ let fold f init =
| Error `Eof -> return (Ok acc)
| Error `Fatal _ as e -> return e in
go (decode_state ()) init

let rec writev = function
| [] -> return (Ok ())
| x :: r ->
let* () = write x in
writev r

let rec pipe stream =
let* block = stream () in
match block with
| Some str -> let* () = writev [ str ] in pipe stream
| None -> return (Ok ())

type ('err, 't) content = unit -> (string option, 'err, 't) t
type ('err, 't) entry = Header.compatibility option * Header.t * ('err, 't) content
type ('err, 't) entries = unit -> (('err, 't) entry option, 'err, 't) t

let out ?level hdr entries =
let rec go () =
let* entry = entries () in
match entry with
| None ->
let* () = writev [ Header.zero_block; Header.zero_block ] in
return (Ok ())
| Some (level, hdr, stream) ->
match encode_header ?level hdr with
| Ok sstr ->
let* () = writev sstr in
let* () = pipe stream in
let* () = writev [ Header.zero_padding hdr ] in
go ()
| Error _ as err -> return err in
match encode_header ?level hdr with
| Error _ as err -> return err
| Ok sstr ->
let* () = writev sstr in
let* () = writev [ Header.zero_padding hdr ] in
go ()
15 changes: 15 additions & 0 deletions lib/tar.mli
Original file line number Diff line number Diff line change
Expand Up @@ -196,16 +196,31 @@ type ('a, 'err, 't) t =
| Bind : ('a, 'err, 't) t * ('a -> ('b, 'err, 't) t) -> ('b, 'err, 't) t
| Return : ('a, 'err) result -> ('a, 'err, 't) t
| High : (('a, 'err) result, 't) io -> ('a, 'err, 't) t
| Write : string -> (unit, 'err, 't) t

val really_read : int -> (string, _, _) t
val read : int -> (string, _, _) t
val seek : int -> (unit, _, _) t
val ( let* ) : ('a, 'err, 't) t -> ('a -> ('b, 'err, 't) t) -> ('b, 'err, 't) t
val return : ('a, 'err) result -> ('a, 'err, _) t
val write : string -> (unit, _, _) t

type ('a, 'err, 't) fold = (?global:Header.Extended.t -> Header.t -> 'a -> ('a, 'err, 't) t) -> 'a -> ('a, 'err, 't) t

val fold : ('a, [> `Fatal of error ], 't) fold
(** [fold f] is a [_ t] that reads an archive and executes [f] on each header.
[f] is expected to either read or skip the file contents, or return an
error. *)

type ('err, 't) content = unit -> (string option, 'err, 't) t
type ('err, 't) entry = Header.compatibility option * Header.t * ('err, 't) content
type ('err, 't) entries = unit -> (('err, 't) entry option, 'err, 't) t

val out :
?level:Header.compatibility
-> Header.t
-> ([> `Msg of string ] as 'err, 't) entries
-> (unit, 'err, 't) t
(** [out hdr entries] is a [_ t] that writes [entries] into an archive. [hdr] is
the global header and each entry must come from a {!type:content} stream and
the associated header.*)
Loading

0 comments on commit 6f4a26b

Please sign in to comment.