From 6c54ca0a5107f648fc3ed96800c1ba9e545d96ac Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Thu, 1 Feb 2024 23:53:06 +0100 Subject: [PATCH 01/34] revise a decoder and encoder, being pure --- lib/tar.ml | 170 +++++++++++++++++++++++++++++++++++++---- lib/tar.mli | 15 ++++ lib_test/parse_test.ml | 55 ++++++++++++- unix/tar_unix.ml | 2 +- 4 files changed, 223 insertions(+), 19 deletions(-) diff --git a/lib/tar.ml b/lib/tar.ml index 35b2403..25d7ac8 100644 --- a/lib/tar.ml +++ b/lib/tar.ml @@ -661,6 +661,104 @@ module Header = struct Int64.(div (add (pred (of_int length)) x.file_size) (of_int length)) end +let longlink = "././@LongLink" + +let fix_link_indicator x = + (* For backward compatibility we treat normal files ending in slash as + directories. Because [Link.of_char] treats unrecognized link indicator + values as normal files we check directly. This is not completely correct + as [Header.Link.of_char] turns unknown link indicators into + [Header.Link.Normal]. Ideally, it should only be done for '0' and + '\000'. *) + if String.length x.Header.file_name > 0 + && x.file_name.[String.length x.file_name - 1] = '/' + && x.link_indicator = Header.Link.Normal then + { x with link_indicator = Header.Link.Directory } + else + x + +type decode_state = { + global : Header.Extended.t option; + state : [ `Active of bool + | `Global_extended_header of Header.t + | `Per_file_extended_header of Header.t + | `Real_header of Header.Extended.t + | `Next_longlink of Header.t ]; + next_longlink : string option ; + next_longname : string option +} + +let decode_state ?global () = + { global ; state = `Active false ; next_longlink = None ; next_longname = None } + +let construct_header t (hdr : Header.t) = + let hdr = Option.fold ~none:hdr ~some:(fun file_name -> { hdr with file_name }) t.next_longname in + let hdr = Option.fold ~none:hdr ~some:(fun link_name -> { hdr with link_name }) t.next_longlink in + let hdr = fix_link_indicator hdr in + { t with next_longlink = None ; next_longname = None ; state = `Active false }, + hdr + +let decode t data = + match t.state with + | `Global_extended_header x -> + let* global = + (* unmarshal merges the previous global (if any) with the + discovered global (if any) and returns the new global. *) + Result.map_error (fun e -> `Fatal e) + (Header.Extended.unmarshal ~global:t.global data) + in + Ok ({ t with global = Some global ; state = `Active false }, + Some (`Skip (Header.compute_zero_padding_length x)), + Some global) + | `Per_file_extended_header x -> + let* extended = + Result.map_error + (fun e -> `Fatal e) + (Header.Extended.unmarshal ~global:t.global data) + in + Ok ({ t with state = `Real_header extended }, + Some (`Skip (Header.compute_zero_padding_length x)), + None) + | `Real_header extended -> + let* x = + Result.map_error + (fun _ -> `Fatal `Corrupt_pax_header) (* NB better error *) + (Header.unmarshal ~extended data) + in + let t, hdr = construct_header t x in + Ok (t, Some (`Header hdr), None) + | `Next_longlink x -> + let name = String.sub data 0 (String.length data - 1) in + let next_longlink = if x.Header.link_indicator = Header.Link.LongLink then Some name else t.next_longlink in + let next_longname = if x.Header.link_indicator = Header.Link.LongName then Some name else t.next_longname in + Ok ({ t with next_longlink ; next_longname ; state = `Active false }, + Some (`Skip (Header.compute_zero_padding_length x)), + None) + | `Active read_zero -> + match Header.unmarshal ?extended:t.global data with + | Ok x when x.Header.link_indicator = Header.Link.GlobalExtendedHeader -> + Ok ({ t with state = `Global_extended_header x }, + Some (`Read (Int64.to_int x.Header.file_size)), + None) + | Ok x when x.Header.link_indicator = Header.Link.PerFileExtendedHeader -> + Ok ({ t with state = `Per_file_extended_header x }, + Some (`Read (Int64.to_int x.Header.file_size)), + None) + | Ok ({ Header.link_indicator = Header.Link.LongLink | Header.Link.LongName; _ } as x) when x.Header.file_name = longlink -> + Ok ({ t with state = `Next_longlink x }, + Some (`Read (Int64.to_int x.Header.file_size)), + None) + | Ok x -> + let t, hdr = construct_header t x in + Ok (t, Some (`Header hdr), None) + | Error `Zero_block -> + if read_zero then + Error `Eof + else + Ok ({ t with state = `Active true }, None, None) + | Error ((`Checksum_mismatch | `Unmarshal _) as e) -> + Error (`Fatal e) + module type ASYNC = sig type 'a t val ( >>= ): 'a t -> ('a -> 'b t) -> 'b t @@ -694,8 +792,6 @@ module type HEADERWRITER = sig val write_global_extended_header : Header.Extended.t -> out_channel -> (unit, [> `Msg of string ]) result io end -let longlink = "././@LongLink" - module HeaderReader(Async: ASYNC)(Reader: READER with type 'a io = 'a Async.t) = struct open Async open Reader @@ -709,19 +805,6 @@ module HeaderReader(Async: ASYNC)(Reader: READER with type 'a io = 'a Async.t) = | Ok x -> f x | Error _ as e -> return e - let fix_link_indicator x = - (* For backward compatibility we treat normal files ending in slash as - directories. Because [Link.of_char] treats unrecognized link indicator - values as normal files we check directly. This is not completely correct - as [Header.Link.of_char] turns unknown link indicators into - [Header.Link.Normal]. Ideally, it should only be done for '0' and - '\000'. *) - if String.length x.Header.file_name > 0 - && x.file_name.[String.length x.file_name - 1] = '/' - && x.link_indicator = Header.Link.Normal then - { x with link_indicator = Header.Link.Directory } - else - x let read ~global (ifd: Reader.in_channel) : (Header.t * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result t = (* We might need to read 2 headers at once if we encounter a Pax header *) @@ -798,6 +881,63 @@ module HeaderReader(Async: ASYNC)(Reader: READER with type 'a io = 'a Async.t) = end +let encode_long level link_indicator payload = + let blank = {Header.file_name = longlink; file_mode = 0; user_id = 0; group_id = 0; mod_time = 0L; file_size = 0L; link_indicator = Header.Link.LongLink; link_name = ""; uname = "root"; gname = "root"; devmajor = 0; devminor = 0; extended = None} in + let payload = payload ^ "\000" in + let file_size = String.length payload in + let blank = {blank with Header.file_size = Int64.of_int file_size} in + let buffer = Bytes.make Header.length '\000' in + let* () = Header.marshal ~level buffer { blank with link_indicator } in + Ok [ Bytes.unsafe_to_string buffer ; payload ; Header.zero_padding blank ] + +let encode_unextended_header ?level header = + let level = Header.get_level level in + let* pre = + if level = Header.GNU then + let* longlink = + if String.length header.Header.link_name > Header.sizeof_hdr_link_name then + encode_long level Header.Link.LongLink header.Header.link_name + else + Ok [] + in + let* longname = + if String.length header.Header.file_name > Header.sizeof_hdr_file_name then + encode_long level Header.Link.LongName header.Header.file_name + else + Ok [] + in + Ok (longlink @ longname) + else + Ok [] + in + let buffer = Bytes.make Header.length '\000' in + let* () = Header.marshal ~level buffer header in + Ok (pre @ [ Bytes.unsafe_to_string buffer ]) + +let encode_extended_header ?level scope hdr = + let link_indicator, link_indicator_name = match scope with + | `Per_file -> Header.Link.PerFileExtendedHeader, "paxheader" + | `Global ->Header.Link.GlobalExtendedHeader, "pax_global_header" + | _ -> assert false + in + let pax_payload = Header.Extended.marshal hdr in + let pax = + Header.make ~link_indicator link_indicator_name + (Int64.of_int @@ String.length pax_payload) + in + let* pax_hdr = encode_unextended_header ?level pax in + Ok (pax_hdr @ [ pax_payload ; Header.zero_padding pax ]) + +let encode_header ?level header = + let* extended = + Option.fold ~none:(Ok []) ~some:(encode_extended_header ?level `Per_file) header.Header.extended + in + let* rest = encode_unextended_header ?level header in + Ok (extended @ rest) + +let encode_global_extended_header global = + encode_extended_header `Global global + module HeaderWriter(Async: ASYNC)(Writer: WRITER with type 'a io = 'a Async.t) = struct open Async open Writer diff --git a/lib/tar.mli b/lib/tar.mli index c995969..6e4085b 100644 --- a/lib/tar.mli +++ b/lib/tar.mli @@ -139,6 +139,21 @@ module Header : sig val to_sectors: t -> int64 end +type decode_state + +val decode_state : ?global:Header.Extended.t -> unit -> decode_state + +val decode : decode_state -> string -> + (decode_state * [ `Read of int | `Skip of int | `Header of Header.t ] option * Header.Extended.t option, + [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) + result + +val encode_header : ?level:Header.compatibility -> + Header.t -> (string list, [> `Msg of string ]) result + +val encode_global_extended_header : Header.Extended.t -> (string list, [> `Msg of string ]) result + + module type ASYNC = sig type 'a t val ( >>= ): 'a t -> ('a -> 'b t) -> 'b t diff --git a/lib_test/parse_test.ml b/lib_test/parse_test.ml index 05ee8e2..3570e9f 100644 --- a/lib_test/parse_test.ml +++ b/lib_test/parse_test.ml @@ -31,8 +31,57 @@ module Unix = struct if Sys.win32 then truncate (convert_path `Windows path) else truncate path end -let list fd = - let rec loop global acc = +let rec with_restart op fd buf off len = + try op fd buf off len with + Unix.Unix_error (Unix.EINTR,_,_) -> + with_restart op fd buf off len + +let really_read fd buf = + let len = Bytes.length buf in + let rec loop offset = + if offset < len then + let n = with_restart Unix.read fd buf offset (len - offset) in + if n = 0 then raise End_of_file; + loop (offset + n) + in + loop 0 + +let run_reader fd = + let rec loop ?b t acc = + let data = match b with + | None -> + let b = Bytes.create Tar.Header.length in + really_read fd b; + Bytes.unsafe_to_string b + | Some s -> s + in + match Tar.decode t data with + | Ok (t, Some `Header hdr, _global) -> + print_endline hdr.Tar.Header.file_name; + ignore (Unix.lseek fd + (Int64.to_int hdr.Tar.Header.file_size + Tar.Header.compute_zero_padding_length hdr) + Unix.SEEK_CUR); + loop t (hdr :: acc) + | Ok (t, Some `Skip n, _global) -> + ignore (Unix.lseek fd n Unix.SEEK_CUR); + loop t acc + | Ok (t, Some `Read n, _global) -> + let b = Bytes.create n in + really_read fd b; + let b = Bytes.unsafe_to_string b in + loop ~b t acc + | Ok (t, None, _global) -> + loop t acc + | Error `Eof -> List.rev acc + | Error `Fatal e -> Alcotest.failf "unexpected error: %a" Tar.pp_error e + in + let t = Tar.decode_state () in + let r = loop t [] in + List.iter (fun h -> print_endline h.Tar.Header.file_name) r; + r + +let list fd = run_reader fd +(* let rec loop global acc = match Tar_unix.HeaderReader.read ~global fd with | Ok (hdr, global) -> print_endline hdr.Tar.Header.file_name; @@ -45,7 +94,7 @@ let list fd = in let r = loop None [] in List.iter (fun h -> print_endline h.Tar.Header.file_name) r; - r + r*) let pp_header f x = Fmt.pf f "%s" (Tar.Header.to_detailed_string x) let header = Alcotest.testable pp_header ( = ) diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index a1c1548..cf39895 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -56,7 +56,7 @@ module HeaderWriter = Tar.HeaderWriter(Direct)(Driver) include Driver - (** Return the header needed for a particular file on disk *) +(** Return the header needed for a particular file on disk *) let header_of_file ?level (file: string) : Tar.Header.t = let level = Tar.Header.compatibility level in let stat = Unix.LargeFile.lstat file in From c67f94535a87a2d5ad27528fa1df2c8192978bef Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Fri, 2 Feb 2024 19:48:13 +0100 Subject: [PATCH 02/34] remove stuff --- lib/tar.ml | 213 ---------------------------------------------------- lib/tar.mli | 64 +++++----------- 2 files changed, 18 insertions(+), 259 deletions(-) diff --git a/lib/tar.ml b/lib/tar.ml index 25d7ac8..fad5cf7 100644 --- a/lib/tar.ml +++ b/lib/tar.ml @@ -759,128 +759,6 @@ let decode t data = | Error ((`Checksum_mismatch | `Unmarshal _) as e) -> Error (`Fatal e) -module type ASYNC = sig - type 'a t - val ( >>= ): 'a t -> ('a -> 'b t) -> 'b t - val return: 'a -> 'a t -end - -module type READER = sig - type in_channel - type 'a io - val really_read: in_channel -> bytes -> unit io - val skip: in_channel -> int -> unit io -end - -module type WRITER = sig - type out_channel - type 'a io - val really_write: out_channel -> string -> unit io -end - -module type HEADERREADER = sig - type in_channel - type 'a io - val read : global:Header.Extended.t option -> in_channel -> - (Header.t * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result io -end - -module type HEADERWRITER = sig - type out_channel - type 'a io - val write : ?level:Header.compatibility -> Header.t -> out_channel -> (unit, [> `Msg of string ]) result io - val write_global_extended_header : Header.Extended.t -> out_channel -> (unit, [> `Msg of string ]) result io -end - -module HeaderReader(Async: ASYNC)(Reader: READER with type 'a io = 'a Async.t) = struct - open Async - open Reader - - type in_channel = Reader.in_channel - type 'a io = 'a t - - (* This is not a bind, but more a lift and bind combined. *) - let ( let^* ) x f = - match x with - | Ok x -> f x - | Error _ as e -> return e - - - let read ~global (ifd: Reader.in_channel) : (Header.t * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result t = - (* We might need to read 2 headers at once if we encounter a Pax header *) - let buffer = Bytes.make Header.length '\000' in - let real_header_buf = Bytes.make Header.length '\000' in - - let next_block global () = - really_read ifd buffer >>= fun () -> - return (Header.unmarshal ?extended:global (Bytes.unsafe_to_string buffer)) - in - - let rec get_hdr ~next_longname ~next_longlink global () : (Header.t * Header.Extended.t option, [> `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result t = - next_block global () >>= function - | Ok x when x.Header.link_indicator = Header.Link.GlobalExtendedHeader -> - let extra_header_buf = Bytes.make (Int64.to_int x.Header.file_size) '\000' in - really_read ifd extra_header_buf >>= fun () -> - skip ifd (Header.compute_zero_padding_length x) >>= fun () -> - (* unmarshal merges the previous global (if any) with the - discovered global (if any) and returns the new global. *) - let^* global = - Result.map_error - (fun e -> `Fatal e) - (Header.Extended.unmarshal ~global (Bytes.unsafe_to_string extra_header_buf)) - in - get_hdr ~next_longname ~next_longlink (Some global) () - | Ok x when x.Header.link_indicator = Header.Link.PerFileExtendedHeader -> - let extra_header_buf = Bytes.make (Int64.to_int x.Header.file_size) '\000' in - really_read ifd extra_header_buf >>= fun () -> - skip ifd (Header.compute_zero_padding_length x) >>= fun () -> - let^* extended = - Result.map_error - (fun e -> `Fatal e) - (Header.Extended.unmarshal ~global (Bytes.unsafe_to_string extra_header_buf)) - in - really_read ifd real_header_buf >>= fun () -> - let^* x = - Result.map_error - (fun _ -> `Fatal `Corrupt_pax_header) - (Header.unmarshal ~extended (Bytes.unsafe_to_string real_header_buf)) - in - let x = fix_link_indicator x in - return (Ok (x, global)) - | Ok ({ Header.link_indicator = Header.Link.LongLink | Header.Link.LongName; _ } as x) when x.Header.file_name = longlink -> - let extra_header_buf = Bytes.create (Int64.to_int x.Header.file_size) in - really_read ifd extra_header_buf >>= fun () -> - skip ifd (Header.compute_zero_padding_length x) >>= fun () -> - let name = String.sub (Bytes.unsafe_to_string extra_header_buf) 0 (Bytes.length extra_header_buf - 1) in - let next_longlink = if x.Header.link_indicator = Header.Link.LongLink then Some name else next_longlink in - let next_longname = if x.Header.link_indicator = Header.Link.LongName then Some name else next_longname in - get_hdr ~next_longname ~next_longlink global () - | Ok x -> - (* XXX: unclear how/if pax headers should interact with gnu extensions *) - let x = match next_longname with - | None -> x - | Some file_name -> { x with file_name } - in - let x = match next_longlink with - | None -> x - | Some link_name -> { x with link_name } - in - let x = fix_link_indicator x in - return (Ok (x, global)) - | Error `Zero_block -> - begin - next_block global () >>= function - | Ok x -> return (Ok (x, global)) - | Error `Zero_block -> return (Error `Eof) - | Error ((`Checksum_mismatch | `Unmarshal _) as e) -> return (Error (`Fatal e)) - end - | Error ((`Checksum_mismatch | `Unmarshal _) as e) -> - return (Error (`Fatal e)) - in - get_hdr ~next_longname:None ~next_longlink:None global () - -end - let encode_long level link_indicator payload = let blank = {Header.file_name = longlink; file_mode = 0; user_id = 0; group_id = 0; mod_time = 0L; file_size = 0L; link_indicator = Header.Link.LongLink; link_name = ""; uname = "root"; gname = "root"; devmajor = 0; devminor = 0; extended = None} in let payload = payload ^ "\000" in @@ -937,94 +815,3 @@ let encode_header ?level header = let encode_global_extended_header global = encode_extended_header `Global global - -module HeaderWriter(Async: ASYNC)(Writer: WRITER with type 'a io = 'a Async.t) = struct - open Async - open Writer - - type out_channel = Writer.out_channel - type 'a io = 'a t - - let write_unextended ?level header fd = - let level = Header.compatibility level in - let blank = {Header.file_name = longlink; file_mode = 0; user_id = 0; group_id = 0; mod_time = 0L; file_size = 0L; link_indicator = Header.Link.LongLink; link_name = ""; uname = "root"; gname = "root"; devmajor = 0; devminor = 0; extended = None} in - (if level = Header.GNU then begin - begin - if String.length header.Header.link_name > Header.sizeof_hdr_link_name then begin - let file_size = String.length header.Header.link_name + 1 in - let blank = {blank with Header.file_size = Int64.of_int file_size} in - let buffer = Bytes.make Header.length '\000' in - match - Header.marshal ~level buffer { blank with link_indicator = Header.Link.LongLink } - with - | Error _ as e -> return e - | Ok () -> - really_write fd (Bytes.unsafe_to_string buffer) >>= fun () -> - let payload = header.Header.link_name ^ "\000" in - really_write fd payload >>= fun () -> - really_write fd (Header.zero_padding blank) >>= fun () -> - return (Ok ()) - end else - return (Ok ()) - end >>= function - | Error _ as e -> return e - | Ok () -> - begin - if String.length header.Header.file_name > Header.sizeof_hdr_file_name then begin - let file_size = String.length header.Header.file_name + 1 in - let blank = {blank with Header.file_size = Int64.of_int file_size} in - let buffer = Bytes.make Header.length '\000' in - match - Header.marshal ~level buffer { blank with link_indicator = Header.Link.LongName } - with - | Error _ as e -> return e - | Ok () -> - really_write fd (Bytes.unsafe_to_string buffer) >>= fun () -> - let payload = header.Header.file_name ^ "\000" in - really_write fd payload >>= fun () -> - really_write fd (Header.zero_padding blank) >>= fun () -> - return (Ok ()) - end else - return (Ok ()) - end >>= function - | Error _ as e -> return e - | Ok () -> return (Ok ()) - end else - return (Ok ())) >>= function - | Error _ as e -> return e - | Ok () -> - let buffer = Bytes.make Header.length '\000' in - match Header.marshal ~level buffer header with - | Error _ as e -> return e - | Ok () -> - really_write fd (Bytes.unsafe_to_string buffer) >>= fun () -> - return (Ok ()) - - let write_extended ?level ~link_indicator hdr fd = - let link_indicator_name = match link_indicator with - | Header.Link.PerFileExtendedHeader -> "paxheader" - | Header.Link.GlobalExtendedHeader -> "pax_global_header" - | _ -> assert false - in - let pax_payload = Header.Extended.marshal hdr in - let pax = Header.make ~link_indicator link_indicator_name - (Int64.of_int @@ String.length pax_payload) in - write_unextended ?level pax fd >>= function - | Error _ as e -> return e - | Ok () -> - really_write fd pax_payload >>= fun () -> - really_write fd (Header.zero_padding pax) >>= fun () -> - return (Ok ()) - - let write ?level header fd = - ( match header.Header.extended with - | None -> return (Ok ()) - | Some e -> - write_extended ?level ~link_indicator:Header.Link.PerFileExtendedHeader e fd ) - >>= function - | Error _ as e -> return e - | Ok () -> write_unextended ?level header fd - - let write_global_extended_header global fd = - write_extended ~link_indicator:Header.Link.GlobalExtendedHeader global fd -end diff --git a/lib/tar.mli b/lib/tar.mli index 6e4085b..b4f08b1 100644 --- a/lib/tar.mli +++ b/lib/tar.mli @@ -139,62 +139,34 @@ module Header : sig val to_sectors: t -> int64 end +(** {1 Decoding and encoding of a whole archive} *) + +(** The type of the decode state. *) type decode_state +(** [decode_state ~global ()] constructs a decode_state. *) val decode_state : ?global:Header.Extended.t -> unit -> decode_state +(** [decode t data] decodes [data] taking the current state [t] into account. + It may result on success in a new state, optionally some action that should + be done ([`Read] or [`Skip]), or a decoded [`Header]. Possibly a new global + PAX header is provided as well. + + If no [`Read] or [`Skip] is returned, the new state should be used with + [decode] with the next [Header.length] sized string, which will lead to + further decoding until [`Eof] (or an error) occurs. *) val decode : decode_state -> string -> (decode_state * [ `Read of int | `Skip of int | `Header of Header.t ] option * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result +(** [encode_header ~level hdr] encodes the header with the provided [level] + (defaults to [V7]) into a list of strings to be written to the disk. + Once a header is written, the payload (padded to multiples of + [Header.length]) should follow. *) val encode_header : ?level:Header.compatibility -> Header.t -> (string list, [> `Msg of string ]) result +(** [encode_global_extended_header hdr] encodes the global extended header as + a list of strings. *) val encode_global_extended_header : Header.Extended.t -> (string list, [> `Msg of string ]) result - - -module type ASYNC = sig - type 'a t - val ( >>= ): 'a t -> ('a -> 'b t) -> 'b t - val return: 'a -> 'a t -end - -module type READER = sig - type in_channel - type 'a io - val really_read: in_channel -> bytes -> unit io - val skip: in_channel -> int -> unit io -end - -module type WRITER = sig - type out_channel - type 'a io - val really_write: out_channel -> string -> unit io -end - -module type HEADERREADER = sig - type in_channel - type 'a io - - (** Returns the next header block or error [`Eof] if two consecutive - zero-filled blocks are discovered. Assumes stream is positioned at the - possible start of a header block. - @param global Holds the current global pax extended header, if - any. Needs to be given to the next call to [read]. *) - val read : global:Header.Extended.t option -> in_channel -> - (Header.t * Header.Extended.t option, [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) result io -end - -module type HEADERWRITER = sig - type out_channel - type 'a io - val write : ?level:Header.compatibility -> Header.t -> out_channel -> (unit, [> `Msg of string ]) result io - val write_global_extended_header : Header.Extended.t -> out_channel -> (unit, [> `Msg of string ]) result io -end - -module HeaderReader(Async: ASYNC)(Reader: READER with type 'a io = 'a Async.t) : - HEADERREADER with type in_channel = Reader.in_channel and type 'a io = 'a Async.t - -module HeaderWriter(Async: ASYNC)(Writer: WRITER with type 'a io = 'a Async.t) : - HEADERWRITER with type out_channel = Writer.out_channel and type 'a io = 'a Async.t From 9ccc73b3da29012a91a3f6895494106727c59c92 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sat, 3 Feb 2024 13:48:24 +0100 Subject: [PATCH 03/34] wip --- unix/tar_unix.mli | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index b21ad57..086dd24 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -16,20 +16,18 @@ (** Unix I/O for tar-formatted data. *) -val really_read: Unix.file_descr -> bytes -> unit -(** [really_read fd buf] fills [buf] with data from [fd] or raises - {!Stdlib.End_of_file}. *) - -val really_write: Unix.file_descr -> string -> unit -(** [really_write fd buf] writes the full contents of [buf] to [fd] - or {!Stdlib.End_of_file}. *) - -val skip : Unix.file_descr -> int -> unit -(** [skip fd n] reads [n] bytes from [fd] and discards them. If possible, you - should use [Unix.lseek fd n Unix.SEEK_CUR] instead. *) - (** Return the header needed for a particular file on disk. *) val header_of_file : ?level:Tar.Header.compatibility -> string -> Tar.Header.t -module HeaderReader : Tar.HEADERREADER with type in_channel = Unix.file_descr and type 'a io = 'a -module HeaderWriter : Tar.HEADERWRITER with type out_channel = Unix.file_descr and type 'a io = 'a +(** Fold over a tar archive. *) +val fold_tar : + ((Tar.Header.t * Tar.Header.Extended.t option, + [ + | `Eof + | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] + | `Unix of Unix.error + ]) result -> 'a -> 'a) -> + Unix.file_descr -> + 'a -> 'a + +val append_file : ?header:Tar.Header.t -> string -> From ebabd3cd2d534f77f22366e0e49ae9be3c893a19 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sat, 3 Feb 2024 16:52:27 +0100 Subject: [PATCH 04/34] fix --- lib/tar.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tar.ml b/lib/tar.ml index fad5cf7..0da894c 100644 --- a/lib/tar.ml +++ b/lib/tar.ml @@ -769,7 +769,7 @@ let encode_long level link_indicator payload = Ok [ Bytes.unsafe_to_string buffer ; payload ; Header.zero_padding blank ] let encode_unextended_header ?level header = - let level = Header.get_level level in + let level = Header.compatibility level in let* pre = if level = Header.GNU then let* longlink = From ce9337b1d819aca74b6602f4638623ee32699af9 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sat, 3 Feb 2024 16:52:42 +0100 Subject: [PATCH 05/34] proposed API --- unix/tar_unix.mli | 48 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index 086dd24..a6f61b5 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -16,18 +16,44 @@ (** Unix I/O for tar-formatted data. *) -(** Return the header needed for a particular file on disk. *) -val header_of_file : ?level:Tar.Header.compatibility -> string -> Tar.Header.t - -(** Fold over a tar archive. *) -val fold_tar : - ((Tar.Header.t * Tar.Header.Extended.t option, +(** [fold f filename acc] folds over the tar archive. The function [f] is called + for each [hdr : Tar.Header.t]. It should forward the position in the file + descriptor by [hdr.Tar.Header.file_size]. *) +val fold : + ((Unix.file_descr * Tar.Header.t * Tar.Header.Extended.t option, [ - | `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] - | `Unix of Unix.error + | `Unix of Unix.error * string * string + | `End_of_file ]) result -> 'a -> 'a) -> - Unix.file_descr -> - 'a -> 'a + string -> 'a -> 'a + +(** [extract ~src ~dst] extracts the tar archive [src] into the directory [dst]. + If [dst] does not exist, it is created. *) +val extract : src:string -> dst:string -> + (unit, [ `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] + | `Unix of Unix.error ]) result + +(** [create ~level ~src ~dst] creates a tar archive at [dst]. It uses [src], a + filename or directory name, as input. *) +val create : ?level:Tar.Header.compatibility -> src:string -> dst:string -> + (unit, [ `Msg of string | `Unix of Unix.error ]) result + +(** [header_of_file ~level filename] returns the tar header of [filename]. *) +val header_of_file : ?level:Tar.Header.compatibility -> string -> Tar.Header.t + +(** [append_file ~level ~header filename fd] appends the contents of [filename] + to the tar archive [fd]. If [header] is not provided, {header_of_file} is + used for constructing a header. *) +val append_file : ?level:Tar.Header.compatibility -> ?header:Tar.Header.t -> + string -> Unix.file_descr -> + (unit, [ `Msg of string | `Unix of Unix.error ]) result + +(** [write_extended_header ~level hdr fd] writes the extended header [hdr] to + [fd]. *) +val write_extended_header : ?level:Tar.Header.compatibility -> + Tar.Header.Extended.t -> Unix.file_descr -> + (unit, [ `Msg of string | `Unix of Unix.error ]) result -val append_file : ?header:Tar.Header.t -> string -> +(** [write_end fd] writes the tar end marker to [fd]. *) +val write_end : Unix.file_descr -> (unit, Unix.error) result From 50f66596026afac40730f563d7d76630cf5857f8 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sat, 3 Feb 2024 16:59:24 +0100 Subject: [PATCH 06/34] add filter --- unix/tar_unix.mli | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index a6f61b5..cbea997 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -28,15 +28,23 @@ val fold : ]) result -> 'a -> 'a) -> string -> 'a -> 'a -(** [extract ~src ~dst] extracts the tar archive [src] into the directory [dst]. - If [dst] does not exist, it is created. *) -val extract : src:string -> dst:string -> +(** [extract ~filter ~src ~dst] extracts the tar archive [src] into the + directory [dst]. If [dst] does not exist, it is created. If [filter] is + provided (defaults to [fun _ -> true]), any file where [filter hdr] returns + [false], is skipped. *) +val extract : + ?filter:(Tar.Header.t -> bool) -> + src:string -> dst:string -> (unit, [ `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] | `Unix of Unix.error ]) result -(** [create ~level ~src ~dst] creates a tar archive at [dst]. It uses [src], a - filename or directory name, as input. *) -val create : ?level:Tar.Header.compatibility -> src:string -> dst:string -> +(** [create ~level ~filter ~src ~dst] creates a tar archive at [dst]. It uses + [src], a filename or directory name, as input. If [filter] is provided + (defaults to [fun _ -> true]), any file where [filter hdr] returns [false] + is skipped. *) +val create : ?level:Tar.Header.compatibility -> + ?filter:(Tar.Header.t -> bool) -> + src:string -> dst:string -> (unit, [ `Msg of string | `Unix of Unix.error ]) result (** [header_of_file ~level filename] returns the tar header of [filename]. *) From 1b4ae554bc3e95c4009b346a67b4a87868019d57 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sat, 3 Feb 2024 22:35:56 +0100 Subject: [PATCH 07/34] initial compiling tar_unix --- lib/tar.ml | 4 +- lib/tar.mli | 2 +- unix/tar_unix.ml | 259 +++++++++++++++++++++++++++++++++++++++------- unix/tar_unix.mli | 50 +++++---- 4 files changed, 253 insertions(+), 62 deletions(-) diff --git a/lib/tar.ml b/lib/tar.ml index 0da894c..7fe8709 100644 --- a/lib/tar.ml +++ b/lib/tar.ml @@ -813,5 +813,5 @@ let encode_header ?level header = let* rest = encode_unextended_header ?level header in Ok (extended @ rest) -let encode_global_extended_header global = - encode_extended_header `Global global +let encode_global_extended_header ?level global = + encode_extended_header ?level `Global global diff --git a/lib/tar.mli b/lib/tar.mli index b4f08b1..f0a24de 100644 --- a/lib/tar.mli +++ b/lib/tar.mli @@ -169,4 +169,4 @@ val encode_header : ?level:Header.compatibility -> (** [encode_global_extended_header hdr] encodes the global extended header as a list of strings. *) -val encode_global_extended_header : Header.Extended.t -> (string list, [> `Msg of string ]) result +val encode_global_extended_header : ?level:Header.compatibility -> Header.Extended.t -> (string list, [> `Msg of string ]) result diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index cf39895..d9c858e 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -15,60 +15,243 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) -module Direct = struct - type 'a t = 'a - let return x = x - let ( >>= ) m f = f m -end - -module Driver = struct - type 'a io = 'a Direct.t - type in_channel = Unix.file_descr - type out_channel = Unix.file_descr - - let rec with_restart op fd buf off len = - try op fd buf off len with - Unix.Unix_error (Unix.EINTR,_,_) -> - with_restart op fd buf off len - - let really_read fd buf = - let len = Bytes.length buf in - let rec loop offset = - if offset < len then - let n = with_restart Unix.read fd buf offset (len - offset) in - if n = 0 then raise End_of_file; +let ( let* ) = Result.bind + +let rec safe ~off f a = + try Ok (f a) with + | Unix.Unix_error (Unix.EINTR, _, _) -> safe ~off f a + | Unix.Unix_error (e, f, s) -> Error (`Unix (off, e, f, s)) + +let safe_close fd = + try Unix.close fd with _ -> () + +let read_complete ~off fd buf len = + let rec loop offset = + if offset < len then + let* n = safe ~off (Unix.read fd buf offset) (len - offset) in + if n = 0 then + Error (`Unexpected_end_of_file off) + else loop (offset + n) + else + Ok () + in + loop 0 + +let seek ~off fd n = + safe ~off (Unix.lseek fd n) Unix.SEEK_CUR + +type decode_error = [ + | `Fatal of int * [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] + | `Unix of int * Unix.error * string * string + | `Unexpected_end_of_file of int + | `Msg of int * string +] + +let pp_decode_error ppf = function + | `Fatal (off, err) -> + Format.fprintf ppf "Offset %u, %a" off Tar.pp_error err + | `Unix (off, err, fname, arg) -> + Format.fprintf ppf "Offset %u, Unix error %s (function %s, arg %s)" off + (Unix.error_message err) fname arg + | `Unexpected_end_of_file off -> + Format.fprintf ppf "Offset %u unexpected end of file" off + | `Msg (off, msg) -> + Format.fprintf ppf "Offset %u error %s" off msg + +let fold f filename init = + let* fd = safe ~off:0 Unix.(openfile filename [ O_RDONLY ]) 0 in + let rec go ~off t fd ?global ?data acc = + let* data = match data with + | None -> + let buf = Bytes.make Tar.Header.length '\000' in + let* () = read_complete ~off fd buf Tar.Header.length in + Ok (Bytes.unsafe_to_string buf) + | Some data -> Ok data in - loop 0 + match Tar.decode t data with + | Ok (t, Some `Header hdr, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + let* acc' = + Result.map_error + (fun (`Msg s) -> `Msg (off, s)) + (f fd ?global hdr acc) + in + let* off = seek ~off fd (Tar.Header.compute_zero_padding_length hdr) in + go ~off t fd ?global acc' + | Ok (t, Some `Skip n, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + let* off = seek ~off fd n in + go ~off t fd ?global acc + | Ok (t, Some `Read n, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + let buf = Bytes.make n '\000' in + let* () = read_complete ~off fd buf n in + let data = Bytes.unsafe_to_string buf in + go ~off:(off + n) t fd ?global ~data acc + | Ok (t, None, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + go ~off t fd ?global acc + | Error `Eof -> Ok acc + | Error `Fatal e -> Error (`Fatal (off, e)) + in + Fun.protect + ~finally:(fun () -> safe_close fd) + (fun () -> go ~off:0 (Tar.decode_state ()) fd init) - let skip fd n = - ignore (Unix.lseek fd n Unix.SEEK_CUR) +let map_to_msg = function + | `Unix (_off, e, f, s) -> + `Msg (Format.sprintf "error %s in function %s %s" + (Unix.error_message e) f s) - let really_write fd buf = - let offset = ref 0 in - while !offset < String.length buf do - offset := !offset + with_restart Unix.write_substring fd buf !offset (String.length buf - !offset) - done -end +let copy ~src_fd ~dst_fd len = + let blen = 65536 in + let buffer = Bytes.make blen '\000' in + let rec read_write ~src_fd ~dst_fd len = + if len = 0 then + Ok () + else + let l = min blen len in + let* () = + Result.map_error + (function + | `Unix _ as e -> map_to_msg e + | `Unexpected_end_of_file _off -> + `Msg ("Unexpected end of file")) + (read_complete ~off:0 src_fd buffer l) + in + let* _written = + Result.map_error map_to_msg + (safe ~off:0 (Unix.write dst_fd buffer 0) l) + in + read_write ~src_fd ~dst_fd (len - l) + in + read_write ~src_fd ~dst_fd len -module HeaderReader = Tar.HeaderReader(Direct)(Driver) -module HeaderWriter = Tar.HeaderWriter(Direct)(Driver) +let extract ?(filter = fun _ -> true) ~src dst = + let f fd ?global:_ hdr () = + if filter hdr then + match hdr.Tar.Header.link_indicator with + | Tar.Header.Link.Normal -> + let* dst = + Result.map_error map_to_msg + (safe ~off:0 Unix.(openfile (Filename.concat dst hdr.Tar.Header.file_name) + [ O_WRONLY ; O_CREAT ]) hdr.Tar.Header.file_mode) + in + Fun.protect ~finally:(fun () -> safe_close dst) + (fun () -> copy ~src_fd:fd ~dst_fd:dst (Int64.to_int hdr.Tar.Header.file_size)) + (* TODO set owner / mode / mtime etc. *) + | _ -> Error (`Msg "not yet handled") + else + let* _off = + Result.map_error (fun (`Unix (_off, e, f, s)) -> + `Msg (Format.sprintf "error %s in function %s %s" + (Unix.error_message e) f s)) + (seek ~off:0 fd (Int64.to_int hdr.Tar.Header.file_size)) + in + Ok () + in + fold f src () -include Driver (** Return the header needed for a particular file on disk *) -let header_of_file ?level (file: string) : Tar.Header.t = +let header_of_file ?level file = let level = Tar.Header.compatibility level in - let stat = Unix.LargeFile.lstat file in + let* stat = safe ~off:0 Unix.LargeFile.lstat file in let file_mode = stat.Unix.LargeFile.st_perm in let user_id = stat.Unix.LargeFile.st_uid in let group_id = stat.Unix.LargeFile.st_gid in let mod_time = Int64.of_float stat.Unix.LargeFile.st_mtime in + (* TODO evaluate stat.st_kind *) let link_indicator = Tar.Header.Link.Normal in let link_name = "" in let uname = if level = V7 then "" else (Unix.getpwuid stat.Unix.LargeFile.st_uid).Unix.pw_name in let devmajor = if level = Ustar then stat.Unix.LargeFile.st_dev else 0 in let gname = if level = V7 then "" else (Unix.getgrgid stat.Unix.LargeFile.st_gid).Unix.gr_name in let devminor = if level = Ustar then stat.Unix.LargeFile.st_rdev else 0 in - Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name - ~uname ~gname ~devmajor ~devminor file stat.Unix.LargeFile.st_size + Ok (Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name + ~uname ~gname ~devmajor ~devminor file stat.Unix.LargeFile.st_size) + +let append_file ?level ?header filename fd = + let* header = match header with + | None -> header_of_file ?level filename + | Some x -> Ok x + in + let* header_strings = Tar.encode_header ?level header in + let* _off = + List.fold_left (fun acc d -> + let* _off = acc in + Result.map_error map_to_msg + (safe ~off:0 (Unix.write_substring fd d 0) (String.length d))) + (Ok 0) header_strings + in + let* src = + Result.map_error (fun (`Unix (_off, e, f, s)) -> + `Msg (Format.sprintf "error %s in function %s %s" + (Unix.error_message e) f s)) + (safe ~off:0 Unix.(openfile filename [ O_RDONLY ]) 0) + in + (* TOCTOU [also, header may not be valid for file] *) + Fun.protect ~finally:(fun () -> safe_close src) + (fun () -> copy ~src_fd:src ~dst_fd:fd + (Int64.to_int header.Tar.Header.file_size)) + +let write_global_extended_header ?level header fd = + let* header_strings = Tar.encode_global_extended_header ?level header in + let* _off = + List.fold_left (fun acc d -> + let* _off = acc in + Result.map_error map_to_msg + (safe ~off:0 (Unix.write_substring fd d 0) (String.length d))) + (Ok 0) header_strings + in + Ok () + +let write_end fd = + let* _written = + Result.map_error map_to_msg + (safe ~off:0 + (Unix.write_substring fd (Tar.Header.zero_block ^ Tar.Header.zero_block) 0) + (Tar.Header.length + Tar.Header.length)) + in + Ok () + +let create ?level ?global ?(filter = fun _ -> true) ~src dst = + let* dst_fd = + Result.map_error map_to_msg + (safe ~off:0 Unix.(openfile dst [ O_WRONLY ; O_CREAT ]) + 0o644) + in + Fun.protect ~finally:(fun () -> safe_close dst_fd) + (fun () -> + let* () = match global with + | None -> Ok () + | Some hdr -> + write_global_extended_header ?level hdr dst_fd + in + let rec copy_files directory = + let* dir = safe ~off:0 Unix.opendir directory in + Fun.protect ~finally:(fun () -> try Unix.closedir dir with _ -> ()) + (fun () -> + let rec next () = + try + let* name = safe ~off:0 Unix.readdir dir in + let filename = Filename.concat directory name in + let* header = header_of_file ?level filename in + if filter header then + match header.Tar.Header.link_indicator with + | Normal -> + let* () = append_file ?level ~header filename dst_fd in + next () + | Directory -> + (* TODO first finish curdir (and close the dir fd), then go deeper *) + let* () = copy_files filename in + next () + | _ -> Ok () (* NYI *) + else Ok () + with End_of_file -> Ok () + in + next ()) + in + let* () = copy_files src in + write_end dst_fd) diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index cbea997..996cfc2 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -16,52 +16,60 @@ (** Unix I/O for tar-formatted data. *) +(* TODO provide a type error and a pretty-printer *) + +type decode_error = [ + | `Fatal of int * [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] + | `Unix of int * Unix.error * string * string + | `Unexpected_end_of_file of int + | `Msg of int * string +] + +val pp_decode_error : Format.formatter -> decode_error -> unit + (** [fold f filename acc] folds over the tar archive. The function [f] is called for each [hdr : Tar.Header.t]. It should forward the position in the file descriptor by [hdr.Tar.Header.file_size]. *) val fold : - ((Unix.file_descr * Tar.Header.t * Tar.Header.Extended.t option, - [ - | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] - | `Unix of Unix.error * string * string - | `End_of_file - ]) result -> 'a -> 'a) -> - string -> 'a -> 'a + (Unix.file_descr -> ?global:Tar.Header.Extended.t -> Tar.Header.t -> + 'a -> ('a, [ `Msg of string ]) result) -> + string -> 'a -> ('a, decode_error) result -(** [extract ~filter ~src ~dst] extracts the tar archive [src] into the +(** [extract ~filter ~src dst] extracts the tar archive [src] into the directory [dst]. If [dst] does not exist, it is created. If [filter] is provided (defaults to [fun _ -> true]), any file where [filter hdr] returns [false], is skipped. *) val extract : ?filter:(Tar.Header.t -> bool) -> - src:string -> dst:string -> - (unit, [ `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] - | `Unix of Unix.error ]) result + src:string -> string -> + (unit, decode_error) result -(** [create ~level ~filter ~src ~dst] creates a tar archive at [dst]. It uses - [src], a filename or directory name, as input. If [filter] is provided +(** [create ~level ~filter ~src dst] creates a tar archive at [dst]. It uses + [src], a directory name, as input. If [filter] is provided (defaults to [fun _ -> true]), any file where [filter hdr] returns [false] is skipped. *) val create : ?level:Tar.Header.compatibility -> + ?global:Tar.Header.Extended.t -> ?filter:(Tar.Header.t -> bool) -> - src:string -> dst:string -> - (unit, [ `Msg of string | `Unix of Unix.error ]) result + src:string -> string -> + (unit, [ `Msg of string | `Unix of (int * Unix.error * string * string) ]) result (** [header_of_file ~level filename] returns the tar header of [filename]. *) -val header_of_file : ?level:Tar.Header.compatibility -> string -> Tar.Header.t +val header_of_file : ?level:Tar.Header.compatibility -> string -> + (Tar.Header.t, [ `Unix of (int * Unix.error * string * string) ]) result (** [append_file ~level ~header filename fd] appends the contents of [filename] to the tar archive [fd]. If [header] is not provided, {header_of_file} is used for constructing a header. *) val append_file : ?level:Tar.Header.compatibility -> ?header:Tar.Header.t -> string -> Unix.file_descr -> - (unit, [ `Msg of string | `Unix of Unix.error ]) result + (unit, [ `Msg of string | `Unix of (int * Unix.error * string * string) ]) result -(** [write_extended_header ~level hdr fd] writes the extended header [hdr] to +(** [write_global_extended_header ~level hdr fd] writes the extended header [hdr] to [fd]. *) -val write_extended_header : ?level:Tar.Header.compatibility -> +val write_global_extended_header : ?level:Tar.Header.compatibility -> Tar.Header.Extended.t -> Unix.file_descr -> - (unit, [ `Msg of string | `Unix of Unix.error ]) result + (unit, [ `Msg of string | `Unix of (int * Unix.error * string * string) ]) result (** [write_end fd] writes the tar end marker to [fd]. *) -val write_end : Unix.file_descr -> (unit, Unix.error) result +val write_end : Unix.file_descr -> (unit, [ `Msg of string ]) result From 984ffe097c89981e3b46e992bbb332433604b389 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sun, 4 Feb 2024 00:15:49 +0100 Subject: [PATCH 08/34] remove offset nonsense --- unix/tar_unix.ml | 129 +++++++++++++++++++++------------------------- unix/tar_unix.mli | 22 ++++---- 2 files changed, 69 insertions(+), 82 deletions(-) diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index d9c858e..340462e 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -17,20 +17,20 @@ let ( let* ) = Result.bind -let rec safe ~off f a = +let rec safe f a = try Ok (f a) with - | Unix.Unix_error (Unix.EINTR, _, _) -> safe ~off f a - | Unix.Unix_error (e, f, s) -> Error (`Unix (off, e, f, s)) + | Unix.Unix_error (Unix.EINTR, _, _) -> safe f a + | Unix.Unix_error (e, f, s) -> Error (`Unix (e, f, s)) let safe_close fd = try Unix.close fd with _ -> () -let read_complete ~off fd buf len = +let read_complete fd buf len = let rec loop offset = if offset < len then - let* n = safe ~off (Unix.read fd buf offset) (len - offset) in + let* n = safe (Unix.read fd buf offset) (len - offset) in if n = 0 then - Error (`Unexpected_end_of_file off) + Error `Unexpected_end_of_file else loop (offset + n) else @@ -38,69 +38,64 @@ let read_complete ~off fd buf len = in loop 0 -let seek ~off fd n = - safe ~off (Unix.lseek fd n) Unix.SEEK_CUR +let seek fd n = + safe (Unix.lseek fd n) Unix.SEEK_CUR type decode_error = [ - | `Fatal of int * [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] - | `Unix of int * Unix.error * string * string - | `Unexpected_end_of_file of int - | `Msg of int * string + | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] + | `Unix of Unix.error * string * string + | `Unexpected_end_of_file + | `Msg of string ] let pp_decode_error ppf = function - | `Fatal (off, err) -> - Format.fprintf ppf "Offset %u, %a" off Tar.pp_error err - | `Unix (off, err, fname, arg) -> - Format.fprintf ppf "Offset %u, Unix error %s (function %s, arg %s)" off + | `Fatal err -> Tar.pp_error ppf err + | `Unix (err, fname, arg) -> + Format.fprintf ppf "Unix error %s (function %s, arg %s)" (Unix.error_message err) fname arg - | `Unexpected_end_of_file off -> - Format.fprintf ppf "Offset %u unexpected end of file" off - | `Msg (off, msg) -> - Format.fprintf ppf "Offset %u error %s" off msg + | `Unexpected_end_of_file -> + Format.fprintf ppf "Unexpected end of file" + | `Msg msg -> + Format.fprintf ppf "Error %s" msg let fold f filename init = - let* fd = safe ~off:0 Unix.(openfile filename [ O_RDONLY ]) 0 in - let rec go ~off t fd ?global ?data acc = + let* fd = safe Unix.(openfile filename [ O_RDONLY ]) 0 in + let rec go t fd ?global ?data acc = let* data = match data with | None -> let buf = Bytes.make Tar.Header.length '\000' in - let* () = read_complete ~off fd buf Tar.Header.length in + let* () = read_complete fd buf Tar.Header.length in Ok (Bytes.unsafe_to_string buf) | Some data -> Ok data in match Tar.decode t data with | Ok (t, Some `Header hdr, g) -> let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - let* acc' = - Result.map_error - (fun (`Msg s) -> `Msg (off, s)) - (f fd ?global hdr acc) - in - let* off = seek ~off fd (Tar.Header.compute_zero_padding_length hdr) in - go ~off t fd ?global acc' + let* acc' = f fd ?global hdr acc in + let* _off = seek fd (Tar.Header.compute_zero_padding_length hdr) in + go t fd ?global acc' | Ok (t, Some `Skip n, g) -> let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - let* off = seek ~off fd n in - go ~off t fd ?global acc + let* _off = seek fd n in + go t fd ?global acc | Ok (t, Some `Read n, g) -> let global = Option.fold ~none:global ~some:(fun g -> Some g) g in let buf = Bytes.make n '\000' in - let* () = read_complete ~off fd buf n in + let* () = read_complete fd buf n in let data = Bytes.unsafe_to_string buf in - go ~off:(off + n) t fd ?global ~data acc + go t fd ?global ~data acc | Ok (t, None, g) -> let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - go ~off t fd ?global acc + go t fd ?global acc | Error `Eof -> Ok acc - | Error `Fatal e -> Error (`Fatal (off, e)) + | Error `Fatal _ as e -> e in Fun.protect ~finally:(fun () -> safe_close fd) - (fun () -> go ~off:0 (Tar.decode_state ()) fd init) + (fun () -> go (Tar.decode_state ()) fd init) -let map_to_msg = function - | `Unix (_off, e, f, s) -> +let unix_err_to_msg = function + | `Unix (e, f, s) -> `Msg (Format.sprintf "error %s in function %s %s" (Unix.error_message e) f s) @@ -115,14 +110,14 @@ let copy ~src_fd ~dst_fd len = let* () = Result.map_error (function - | `Unix _ as e -> map_to_msg e - | `Unexpected_end_of_file _off -> - `Msg ("Unexpected end of file")) - (read_complete ~off:0 src_fd buffer l) + | `Unix _ as e -> unix_err_to_msg e + | `Unexpected_end_of_file -> + `Msg "Unexpected end of file") + (read_complete src_fd buffer l) in let* _written = - Result.map_error map_to_msg - (safe ~off:0 (Unix.write dst_fd buffer 0) l) + Result.map_error unix_err_to_msg + (safe (Unix.write dst_fd buffer 0) l) in read_write ~src_fd ~dst_fd (len - l) in @@ -134,30 +129,27 @@ let extract ?(filter = fun _ -> true) ~src dst = match hdr.Tar.Header.link_indicator with | Tar.Header.Link.Normal -> let* dst = - Result.map_error map_to_msg - (safe ~off:0 Unix.(openfile (Filename.concat dst hdr.Tar.Header.file_name) - [ O_WRONLY ; O_CREAT ]) hdr.Tar.Header.file_mode) + Result.map_error unix_err_to_msg + (safe Unix.(openfile (Filename.concat dst hdr.Tar.Header.file_name) + [ O_WRONLY ; O_CREAT ]) hdr.Tar.Header.file_mode) in Fun.protect ~finally:(fun () -> safe_close dst) (fun () -> copy ~src_fd:fd ~dst_fd:dst (Int64.to_int hdr.Tar.Header.file_size)) - (* TODO set owner / mode / mtime etc. *) + (* TODO set owner / mode / mtime etc. *) | _ -> Error (`Msg "not yet handled") else let* _off = - Result.map_error (fun (`Unix (_off, e, f, s)) -> - `Msg (Format.sprintf "error %s in function %s %s" - (Unix.error_message e) f s)) - (seek ~off:0 fd (Int64.to_int hdr.Tar.Header.file_size)) + Result.map_error unix_err_to_msg + (seek fd (Int64.to_int hdr.Tar.Header.file_size)) in Ok () in fold f src () - (** Return the header needed for a particular file on disk *) let header_of_file ?level file = let level = Tar.Header.compatibility level in - let* stat = safe ~off:0 Unix.LargeFile.lstat file in + let* stat = safe Unix.LargeFile.lstat file in let file_mode = stat.Unix.LargeFile.st_perm in let user_id = stat.Unix.LargeFile.st_uid in let group_id = stat.Unix.LargeFile.st_gid in @@ -181,15 +173,13 @@ let append_file ?level ?header filename fd = let* _off = List.fold_left (fun acc d -> let* _off = acc in - Result.map_error map_to_msg - (safe ~off:0 (Unix.write_substring fd d 0) (String.length d))) + Result.map_error unix_err_to_msg + (safe (Unix.write_substring fd d 0) (String.length d))) (Ok 0) header_strings in let* src = - Result.map_error (fun (`Unix (_off, e, f, s)) -> - `Msg (Format.sprintf "error %s in function %s %s" - (Unix.error_message e) f s)) - (safe ~off:0 Unix.(openfile filename [ O_RDONLY ]) 0) + Result.map_error unix_err_to_msg + (safe Unix.(openfile filename [ O_RDONLY ]) 0) in (* TOCTOU [also, header may not be valid for file] *) Fun.protect ~finally:(fun () -> safe_close src) @@ -201,16 +191,16 @@ let write_global_extended_header ?level header fd = let* _off = List.fold_left (fun acc d -> let* _off = acc in - Result.map_error map_to_msg - (safe ~off:0 (Unix.write_substring fd d 0) (String.length d))) + Result.map_error unix_err_to_msg + (safe (Unix.write_substring fd d 0) (String.length d))) (Ok 0) header_strings in Ok () let write_end fd = let* _written = - Result.map_error map_to_msg - (safe ~off:0 + Result.map_error unix_err_to_msg + (safe (Unix.write_substring fd (Tar.Header.zero_block ^ Tar.Header.zero_block) 0) (Tar.Header.length + Tar.Header.length)) in @@ -218,9 +208,8 @@ let write_end fd = let create ?level ?global ?(filter = fun _ -> true) ~src dst = let* dst_fd = - Result.map_error map_to_msg - (safe ~off:0 Unix.(openfile dst [ O_WRONLY ; O_CREAT ]) - 0o644) + Result.map_error unix_err_to_msg + (safe Unix.(openfile dst [ O_WRONLY ; O_CREAT ]) 0o644) in Fun.protect ~finally:(fun () -> safe_close dst_fd) (fun () -> @@ -230,12 +219,12 @@ let create ?level ?global ?(filter = fun _ -> true) ~src dst = write_global_extended_header ?level hdr dst_fd in let rec copy_files directory = - let* dir = safe ~off:0 Unix.opendir directory in + let* dir = safe Unix.opendir directory in Fun.protect ~finally:(fun () -> try Unix.closedir dir with _ -> ()) (fun () -> let rec next () = try - let* name = safe ~off:0 Unix.readdir dir in + let* name = safe Unix.readdir dir in let filename = Filename.concat directory name in let* header = header_of_file ?level filename in if filter header then diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index 996cfc2..357efd3 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -16,13 +16,11 @@ (** Unix I/O for tar-formatted data. *) -(* TODO provide a type error and a pretty-printer *) - type decode_error = [ - | `Fatal of int * [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] - | `Unix of int * Unix.error * string * string - | `Unexpected_end_of_file of int - | `Msg of int * string + | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] + | `Unix of Unix.error * string * string + | `Unexpected_end_of_file + | `Msg of string ] val pp_decode_error : Format.formatter -> decode_error -> unit @@ -31,8 +29,8 @@ val pp_decode_error : Format.formatter -> decode_error -> unit for each [hdr : Tar.Header.t]. It should forward the position in the file descriptor by [hdr.Tar.Header.file_size]. *) val fold : - (Unix.file_descr -> ?global:Tar.Header.Extended.t -> Tar.Header.t -> - 'a -> ('a, [ `Msg of string ]) result) -> + (Unix.file_descr -> ?global:Tar.Header.Extended.t -> Tar.Header.t -> 'a -> + ('a, decode_error) result) -> string -> 'a -> ('a, decode_error) result (** [extract ~filter ~src dst] extracts the tar archive [src] into the @@ -52,24 +50,24 @@ val create : ?level:Tar.Header.compatibility -> ?global:Tar.Header.Extended.t -> ?filter:(Tar.Header.t -> bool) -> src:string -> string -> - (unit, [ `Msg of string | `Unix of (int * Unix.error * string * string) ]) result + (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result (** [header_of_file ~level filename] returns the tar header of [filename]. *) val header_of_file : ?level:Tar.Header.compatibility -> string -> - (Tar.Header.t, [ `Unix of (int * Unix.error * string * string) ]) result + (Tar.Header.t, [ `Unix of (Unix.error * string * string) ]) result (** [append_file ~level ~header filename fd] appends the contents of [filename] to the tar archive [fd]. If [header] is not provided, {header_of_file} is used for constructing a header. *) val append_file : ?level:Tar.Header.compatibility -> ?header:Tar.Header.t -> string -> Unix.file_descr -> - (unit, [ `Msg of string | `Unix of (int * Unix.error * string * string) ]) result + (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result (** [write_global_extended_header ~level hdr fd] writes the extended header [hdr] to [fd]. *) val write_global_extended_header : ?level:Tar.Header.compatibility -> Tar.Header.Extended.t -> Unix.file_descr -> - (unit, [ `Msg of string | `Unix of (int * Unix.error * string * string) ]) result + (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result (** [write_end fd] writes the tar end marker to [fd]. *) val write_end : Unix.file_descr -> (unit, [ `Msg of string ]) result From 9c1c12093be0c0df338183ec16e2cd8f24039a52 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sun, 4 Feb 2024 01:05:00 +0100 Subject: [PATCH 09/34] lwt-unix --- unix/tar_lwt_unix.ml | 287 +++++++++++++++++++++++++++++++++++------- unix/tar_lwt_unix.mli | 64 ++++++++-- unix/tar_unix.ml | 41 ++++-- unix/tar_unix.mli | 2 +- 4 files changed, 324 insertions(+), 70 deletions(-) diff --git a/unix/tar_lwt_unix.ml b/unix/tar_lwt_unix.ml index 60cf251..98bfbf8 100644 --- a/unix/tar_lwt_unix.ml +++ b/unix/tar_lwt_unix.ml @@ -15,57 +15,252 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) -open Lwt.Infix +type decode_error = [ + | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] + | `Unix of Unix.error * string * string + | `Unexpected_end_of_file + | `Msg of string +] -module Io = struct - type in_channel = Lwt_unix.file_descr - type 'a io = 'a Lwt.t - let really_read fd buf = - let len = Bytes.length buf in - let rec loop idx = - if idx = len then - Lwt.return_unit - else - Lwt_unix.read fd buf idx (len - idx) >>= fun n -> - loop (n + idx) - in - loop 0 - let skip (ifd: Lwt_unix.file_descr) (n: int) = - Lwt_unix.(lseek ifd n SEEK_CUR) >|= ignore +let pp_decode_error ppf = function + | `Fatal err -> Tar.pp_error ppf err + | `Unix (err, fname, arg) -> + Format.fprintf ppf "Unix error %s (function %s, arg %s)" + (Unix.error_message err) fname arg + | `Unexpected_end_of_file -> + Format.fprintf ppf "Unexpected end of file" + | `Msg msg -> + Format.fprintf ppf "Error %s" msg + +let safe f a = + let open Lwt.Infix in + Lwt.catch + (fun () -> f a >|= fun r -> Ok r) + (function + | Unix.Unix_error (e, f, a) -> Lwt.return (Error (`Unix (e, f, a))) + | e -> Lwt.reraise e) - type out_channel = Lwt_unix.file_descr - let really_write fd buf = - let len = String.length buf in - let rec loop idx = - if idx = len then - Lwt.return_unit +let read_complete fd buf len = + let open Lwt_result.Infix in + let rec loop offset = + if offset < len then + safe (Lwt_unix.read fd buf offset) (len - offset) >>= fun read -> + if read = 0 then + Lwt.return (Error `Unexpected_end_of_file) else - Lwt_unix.write_string fd buf idx (len - idx) >>= fun n -> - loop (idx + n) - in - loop 0 -end + loop (offset + read) + else + Lwt.return (Ok ()) + in + loop 0 + +let seek fd n = + safe (Lwt_unix.lseek fd n) Unix.SEEK_CUR + +let safe_close fd = + Lwt.catch (fun () -> Lwt_unix.close fd) (fun _ -> Lwt.return_unit) + +let fold f filename init = + let open Lwt_result.Infix in + safe Lwt_unix.(openfile filename [ O_RDONLY ]) 0 >>= fun fd -> + let rec go t fd ?global ?data acc = + (match data with + | None -> + let buf = Bytes.make Tar.Header.length '\000' in + read_complete fd buf Tar.Header.length >|= fun () -> + Bytes.unsafe_to_string buf + | Some data -> + Lwt.return (Ok data)) >>= fun data -> + match Tar.decode t data with + | Ok (t, Some `Header hdr, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + f fd ?global hdr acc >>= fun acc' -> + seek fd (Tar.Header.compute_zero_padding_length hdr) >>= fun _off -> + go t fd ?global acc' + | Ok (t, Some `Skip n, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + seek fd n >>= fun _off -> + go t fd ?global acc + | Ok (t, Some `Read n, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + let buf = Bytes.make n '\000' in + read_complete fd buf n >>= fun () -> + let data = Bytes.unsafe_to_string buf in + go t fd ?global ~data acc + | Ok (t, None, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + go t fd ?global acc + | Error `Eof -> Lwt.return (Ok acc) + | Error `Fatal _ as e -> Lwt.return e + in + Lwt.finalize + (fun () -> go (Tar.decode_state ()) fd init) + (fun () -> safe_close fd) + +let unix_err_to_msg = function + | `Unix (e, f, s) -> + `Msg (Format.sprintf "error %s in function %s %s" + (Unix.error_message e) f s) -include Io -module HeaderReader = Tar.HeaderReader(Lwt)(Io) -module HeaderWriter = Tar.HeaderWriter(Lwt)(Io) +let copy ~src_fd ~dst_fd len = + let open Lwt_result.Infix in + let blen = 65536 in + let buffer = Bytes.make blen '\000' in + let rec read_write ~src_fd ~dst_fd len = + if len = 0 then + Lwt.return (Ok ()) + else + let l = min blen len in + Lwt_result.map_error + (function + | `Unix _ as e -> unix_err_to_msg e + | `Unexpected_end_of_file -> + `Msg "Unexpected end of file") + (read_complete src_fd buffer l) >>= fun () -> + Lwt_result.map_error unix_err_to_msg + (safe (Lwt_unix.write dst_fd buffer 0) l) >>= fun _written -> + read_write ~src_fd ~dst_fd (len - l) + in + read_write ~src_fd ~dst_fd len + +let extract ?(filter = fun _ -> true) ~src dst = + let open Lwt_result.Infix in + let f fd ?global:_ hdr () = + if filter hdr then + match hdr.Tar.Header.link_indicator with + | Tar.Header.Link.Normal -> + Lwt_result.map_error unix_err_to_msg + (safe Lwt_unix.(openfile (Filename.concat dst hdr.Tar.Header.file_name) + [ O_WRONLY ; O_CREAT ]) hdr.Tar.Header.file_mode) >>= fun dst -> + Lwt.finalize + (fun () -> copy ~src_fd:fd ~dst_fd:dst (Int64.to_int hdr.Tar.Header.file_size)) + (fun () -> safe_close dst) + (* TODO set owner / mode / mtime etc. *) + | _ -> + (* TODO handle directories, links, etc. *) + Lwt_result.map_error unix_err_to_msg + (seek fd (Int64.to_int hdr.Tar.Header.file_size)) >|= fun _off -> + () + else + Lwt_result.map_error unix_err_to_msg + (seek fd (Int64.to_int hdr.Tar.Header.file_size)) >|= fun _off -> + () + in + fold f src () (** Return the header needed for a particular file on disk *) -let header_of_file ?level (file: string) : Tar.Header.t Lwt.t = +let header_of_file ?level file = + let open Lwt_result.Infix in let level = Tar.Header.compatibility level in - Lwt_unix.LargeFile.stat file >>= fun stat -> - Lwt_unix.getpwuid stat.Lwt_unix.LargeFile.st_uid >>= fun pwent -> - Lwt_unix.getgrgid stat.Lwt_unix.LargeFile.st_gid >>= fun grent -> - let file_mode = stat.Lwt_unix.LargeFile.st_perm in - let user_id = stat.Lwt_unix.LargeFile.st_uid in - let group_id = stat.Lwt_unix.LargeFile.st_gid in - let file_size = stat.Lwt_unix.LargeFile.st_size in - let mod_time = Int64.of_float stat.Lwt_unix.LargeFile.st_mtime in + safe Lwt_unix.LargeFile.stat file >>= fun stat -> + let file_mode = stat.Lwt_unix.LargeFile.st_perm in + let user_id = stat.Lwt_unix.LargeFile.st_uid in + let group_id = stat.Lwt_unix.LargeFile.st_gid in + let file_size = stat.Lwt_unix.LargeFile.st_size in + let mod_time = Int64.of_float stat.Lwt_unix.LargeFile.st_mtime in let link_indicator = Tar.Header.Link.Normal in - let link_name = "" in - let uname = if level = V7 then "" else pwent.Lwt_unix.pw_name in - let gname = if level = V7 then "" else grent.Lwt_unix.gr_name in - let devmajor = if level = Ustar then stat.Lwt_unix.LargeFile.st_dev else 0 in - let devminor = if level = Ustar then stat.Lwt_unix.LargeFile.st_rdev else 0 in - Lwt.return (Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name - ~uname ~gname ~devmajor ~devminor file file_size) + let link_name = "" in + (if level = V7 then + Lwt.return (Ok "") + else + Lwt.catch + (fun () -> safe Lwt_unix.getpwuid stat.Lwt_unix.LargeFile.st_uid) + (function + | Not_found -> + Lwt.return (Error (`Msg ("No user entry found for UID"))) + | e -> Lwt.reraise e) >|= fun pwent -> + pwent.Lwt_unix.pw_name) >>= fun uname -> + (if level = V7 then + Lwt.return (Ok "") + else + Lwt.catch + (fun () -> safe Lwt_unix.getgrgid stat.Lwt_unix.LargeFile.st_gid) + (function + | Not_found -> + Lwt.return (Error (`Msg ("No group entry found for GID"))) + | e -> Lwt.reraise e) >|= fun grent -> + grent.Lwt_unix.gr_name) >>= fun gname -> + let devmajor = if level = Ustar then stat.Lwt_unix.LargeFile.st_dev else 0 in + let devminor = if level = Ustar then stat.Lwt_unix.LargeFile.st_rdev else 0 in + let hdr = Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name + ~uname ~gname ~devmajor ~devminor file file_size + in + Lwt.return (Ok hdr) + +let append_file ?level ?header filename fd = + let open Lwt_result.Infix in + (match header with + | None -> header_of_file ?level filename + | Some x -> Lwt.return (Ok x)) >>= fun header -> + Lwt_result.lift (Tar.encode_header ?level header) >>= fun header_strings -> + Lwt_list.fold_left_s (fun acc d -> + Lwt_result.lift acc >>= fun _written -> + Lwt_result.map_error unix_err_to_msg + (safe (Lwt_unix.write_string fd d 0) (String.length d))) + (Ok 0) header_strings >>= fun _written -> + Lwt_result.map_error unix_err_to_msg + (safe Lwt_unix.(openfile filename [ O_RDONLY ]) 0) >>= fun src -> + (* TOCTOU [also, header may not be valid for file] *) + Lwt.finalize + (fun () -> copy ~src_fd:src ~dst_fd:fd + (Int64.to_int header.Tar.Header.file_size)) + (fun () -> safe_close src) + +let write_global_extended_header ?level header fd = + let open Lwt_result.Infix in + Lwt_result.lift (Tar.encode_global_extended_header ?level header) >>= fun header_strings -> + Lwt_list.fold_left_s (fun acc d -> + Lwt_result.lift acc >>= fun _written -> + Lwt_result.map_error unix_err_to_msg + (safe (Lwt_unix.write_string fd d 0) (String.length d))) + (Ok 0) header_strings >|= fun _written -> + () + +let write_end fd = + let open Lwt_result.Infix in + Lwt_result.map_error unix_err_to_msg + (safe + (Lwt_unix.write_string fd (Tar.Header.zero_block ^ Tar.Header.zero_block) 0) + (Tar.Header.length + Tar.Header.length)) >|= fun _written -> + () + +let create ?level ?global ?(filter = fun _ -> true) ~src dst = + let open Lwt_result.Infix in + Lwt_result.map_error unix_err_to_msg + (safe Lwt_unix.(openfile dst [ O_WRONLY ; O_CREAT ]) 0o644) >>= fun dst_fd -> + Lwt.finalize + (fun () -> + (match global with + | None -> Lwt.return (Ok ()) + | Some hdr -> write_global_extended_header ?level hdr dst_fd) >>= fun () -> + let rec copy_files directory = + safe Lwt_unix.opendir directory >>= fun dir -> + Lwt.finalize + (fun () -> + let rec next () = + try + safe Lwt_unix.readdir dir >>= fun name -> + let filename = Filename.concat directory name in + header_of_file ?level filename >>= fun header -> + if filter header then + match header.Tar.Header.link_indicator with + | Normal -> + append_file ?level ~header filename dst_fd >>= fun () -> + next () + | Directory -> + (* TODO first finish curdir (and close the dir fd), then go deeper *) + copy_files filename >>= fun () -> + next () + | _ -> Lwt.return (Ok ()) (* NYI *) + else Lwt.return (Ok ()) + with End_of_file -> Lwt.return (Ok ()) + in + next ()) + (fun () -> + Lwt.catch + (fun () -> Lwt_unix.closedir dir) + (fun _ -> Lwt.return_unit)) + in + copy_files src >>= fun () -> + write_end dst_fd) + (fun () -> safe_close dst_fd) diff --git a/unix/tar_lwt_unix.mli b/unix/tar_lwt_unix.mli index 9b97e4d..a4c3d47 100644 --- a/unix/tar_lwt_unix.mli +++ b/unix/tar_lwt_unix.mli @@ -16,20 +16,58 @@ (** Lwt_unix I/O for tar-formatted data *) -val really_read: Lwt_unix.file_descr -> bytes -> unit Lwt.t -(** [really_read fd buf] fills [buf] with data from [fd] or fails - with {!Stdlib.End_of_file}. *) +type decode_error = [ + | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] + | `Unix of Unix.error * string * string + | `Unexpected_end_of_file + | `Msg of string +] -val really_write: Lwt_unix.file_descr -> string -> unit Lwt.t -(** [really_write fd buf] writes the full contents of [buf] to - [fd] or fails with {!Stdlib.End_of_file}. *) +val pp_decode_error : Format.formatter -> decode_error -> unit -val skip : Lwt_unix.file_descr -> int -> unit Lwt.t -(** [skip fd n] reads [n] bytes from [fd] and discards them. If possible, you - should use [Lwt_unix.lseek fd n Lwt_unix.SEEK_CUR] instead. *) +(** [fold f filename acc] folds over the tar archive. The function [f] is called + for each [hdr : Tar.Header.t]. It should forward the position in the file + descriptor by [hdr.Tar.Header.file_size]. *) +val fold : + (Lwt_unix.file_descr -> ?global:Tar.Header.Extended.t -> Tar.Header.t -> 'a -> + ('a, decode_error) result Lwt.t) -> + string -> 'a -> ('a, decode_error) result Lwt.t -(** Return the header needed for a particular file on disk. *) -val header_of_file : ?level:Tar.Header.compatibility -> string -> Tar.Header.t Lwt.t +(** [extract ~filter ~src dst] extracts the tar archive [src] into the + directory [dst]. If [dst] does not exist, it is created. If [filter] is + provided (defaults to [fun _ -> true]), any file where [filter hdr] returns + [false], is skipped. *) +val extract : + ?filter:(Tar.Header.t -> bool) -> + src:string -> string -> + (unit, decode_error) result Lwt.t -module HeaderReader : Tar.HEADERREADER with type in_channel = Lwt_unix.file_descr and type 'a io = 'a Lwt.t -module HeaderWriter : Tar.HEADERWRITER with type out_channel = Lwt_unix.file_descr and type 'a io = 'a Lwt.t +(** [create ~level ~filter ~src dst] creates a tar archive at [dst]. It uses + [src], a directory name, as input. If [filter] is provided + (defaults to [fun _ -> true]), any file where [filter hdr] returns [false] + is skipped. *) +val create : ?level:Tar.Header.compatibility -> + ?global:Tar.Header.Extended.t -> + ?filter:(Tar.Header.t -> bool) -> + src:string -> string -> + (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result Lwt.t + +(** [header_of_file ~level filename] returns the tar header of [filename]. *) +val header_of_file : ?level:Tar.Header.compatibility -> string -> + (Tar.Header.t, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result Lwt.t + +(** [append_file ~level ~header filename fd] appends the contents of [filename] + to the tar archive [fd]. If [header] is not provided, {header_of_file} is + used for constructing a header. *) +val append_file : ?level:Tar.Header.compatibility -> ?header:Tar.Header.t -> + string -> Lwt_unix.file_descr -> + (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result Lwt.t + +(** [write_global_extended_header ~level hdr fd] writes the extended header [hdr] to + [fd]. *) +val write_global_extended_header : ?level:Tar.Header.compatibility -> + Tar.Header.Extended.t -> Lwt_unix.file_descr -> + (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result Lwt.t + +(** [write_end fd] writes the tar end marker to [fd]. *) +val write_end : Lwt_unix.file_descr -> (unit, [ `Msg of string ]) result Lwt.t diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index 340462e..394fd60 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -28,7 +28,7 @@ let safe_close fd = let read_complete fd buf len = let rec loop offset = if offset < len then - let* n = safe (Unix.read fd buf offset) (len - offset) in + let* n = safe (Unix.read fd buf offset) (len - offset) in if n = 0 then Error `Unexpected_end_of_file else @@ -136,7 +136,13 @@ let extract ?(filter = fun _ -> true) ~src dst = Fun.protect ~finally:(fun () -> safe_close dst) (fun () -> copy ~src_fd:fd ~dst_fd:dst (Int64.to_int hdr.Tar.Header.file_size)) (* TODO set owner / mode / mtime etc. *) - | _ -> Error (`Msg "not yet handled") + | _ -> + (* TODO handle directories, links, etc. *) + let* _off = + Result.map_error unix_err_to_msg + (seek fd (Int64.to_int hdr.Tar.Header.file_size)) + in + Ok () else let* _off = Result.map_error unix_err_to_msg @@ -157,9 +163,25 @@ let header_of_file ?level file = (* TODO evaluate stat.st_kind *) let link_indicator = Tar.Header.Link.Normal in let link_name = "" in - let uname = if level = V7 then "" else (Unix.getpwuid stat.Unix.LargeFile.st_uid).Unix.pw_name in + let* uname = + if level = V7 then + Ok "" + else + try + let* passwd_entry = safe Unix.getpwuid stat.Unix.LargeFile.st_uid in + Ok passwd_entry.Unix.pw_name + with Not_found -> Error (`Msg ("No user entry found for UID")) + in let devmajor = if level = Ustar then stat.Unix.LargeFile.st_dev else 0 in - let gname = if level = V7 then "" else (Unix.getgrgid stat.Unix.LargeFile.st_gid).Unix.gr_name in + let* gname = + if level = V7 then + Ok "" + else + try + let* passwd_entry = safe Unix.getgrgid stat.Unix.LargeFile.st_gid in + Ok passwd_entry.Unix.gr_name + with Not_found -> Error (`Msg "No group entry found for GID") + in let devminor = if level = Ustar then stat.Unix.LargeFile.st_rdev else 0 in Ok (Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name ~uname ~gname ~devmajor ~devminor file stat.Unix.LargeFile.st_size) @@ -170,9 +192,9 @@ let append_file ?level ?header filename fd = | Some x -> Ok x in let* header_strings = Tar.encode_header ?level header in - let* _off = + let* _written = List.fold_left (fun acc d -> - let* _off = acc in + let* _written = acc in Result.map_error unix_err_to_msg (safe (Unix.write_substring fd d 0) (String.length d))) (Ok 0) header_strings @@ -188,9 +210,9 @@ let append_file ?level ?header filename fd = let write_global_extended_header ?level header fd = let* header_strings = Tar.encode_global_extended_header ?level header in - let* _off = + let* _written = List.fold_left (fun acc d -> - let* _off = acc in + let* _written = acc in Result.map_error unix_err_to_msg (safe (Unix.write_substring fd d 0) (String.length d))) (Ok 0) header_strings @@ -215,8 +237,7 @@ let create ?level ?global ?(filter = fun _ -> true) ~src dst = (fun () -> let* () = match global with | None -> Ok () - | Some hdr -> - write_global_extended_header ?level hdr dst_fd + | Some hdr -> write_global_extended_header ?level hdr dst_fd in let rec copy_files directory = let* dir = safe Unix.opendir directory in diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index 357efd3..58423e5 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -54,7 +54,7 @@ val create : ?level:Tar.Header.compatibility -> (** [header_of_file ~level filename] returns the tar header of [filename]. *) val header_of_file : ?level:Tar.Header.compatibility -> string -> - (Tar.Header.t, [ `Unix of (Unix.error * string * string) ]) result + (Tar.Header.t, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result (** [append_file ~level ~header filename fd] appends the contents of [filename] to the tar archive [fd]. If [header] is not provided, {header_of_file} is From 29d884e8d90f9af05c3f21d686d7530ed41f7cc8 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sun, 4 Feb 2024 01:37:29 +0100 Subject: [PATCH 10/34] further work, get tests a bit more up to speed --- bin/otar.ml | 3 ++- lib/tar_gz.ml | 3 ++- lib/tar_gz.mli | 3 ++- lib_test/dune | 4 ++-- lib_test/parse_test.ml | 14 +++++++++----- unix/tar_lwt_unix.ml | 35 +++++++++++++++++------------------ unix/tar_lwt_unix.mli | 4 ++++ unix/tar_unix.ml | 38 ++++++++++++++++---------------------- unix/tar_unix.mli | 4 ++++ 9 files changed, 58 insertions(+), 50 deletions(-) diff --git a/bin/otar.ml b/bin/otar.ml index 33de886..75fedd5 100644 --- a/bin/otar.ml +++ b/bin/otar.ml @@ -13,7 +13,7 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) - +(* let () = Printexc.record_backtrace true module Tar_gz = Tar_gz.Make @@ -129,3 +129,4 @@ let () = match Sys.argv with | _ -> let cmd = Filename.basename Sys.argv.(0) in Format.eprintf "%s []\n%s list \n" cmd cmd +*) diff --git a/lib/tar_gz.ml b/lib/tar_gz.ml index e197482..c64e20c 100644 --- a/lib/tar_gz.ml +++ b/lib/tar_gz.ml @@ -1,4 +1,4 @@ -(* +(*(* * Copyright (C) 2022 Romain Calascibetta * * Permission to use, copy, modify, and distribute this software for any @@ -230,3 +230,4 @@ module Make | `End _gz -> Async.return () in until_end (Gz.Def.src state.gz De.bigstring_empty 0 0) end +*) diff --git a/lib/tar_gz.mli b/lib/tar_gz.mli index de18b76..3626a08 100644 --- a/lib/tar_gz.mli +++ b/lib/tar_gz.mli @@ -1,4 +1,4 @@ -(* +(*(* * Copyright (C) 2022 Romain Calascibetta * * Permission to use, copy, modify, and distribute this software for any @@ -72,3 +72,4 @@ module Make module HeaderWriter : Tar.HEADERWRITER with type out_channel = out_channel and type 'a io = 'a Async.t end + *) diff --git a/lib_test/dune b/lib_test/dune index e355bb1..439f9fc 100644 --- a/lib_test/dune +++ b/lib_test/dune @@ -1,6 +1,6 @@ (tests (names parse_test write_test allocate_set_partial_test global_extended_headers_test) - (package tar-mirage) + (package tar-unix) (libraries mirage-block-unix mirage-block @@ -9,4 +9,4 @@ alcotest-lwt lwt tar-unix - tar-mirage)) +)) diff --git a/lib_test/parse_test.ml b/lib_test/parse_test.ml index 3570e9f..2c82f26 100644 --- a/lib_test/parse_test.ml +++ b/lib_test/parse_test.ml @@ -170,13 +170,18 @@ let can_write_pax () = let fd = Unix.openfile filename [ O_CREAT; O_WRONLY; O_CLOEXEC ] 0o0644 in Fun.protect (fun () -> - let hdr = Tar.Header.make ~user_id "test" 0L in - match Tar_unix.HeaderWriter.write hdr fd with + let header = Tar.Header.make ~user_id "test" 0L in + match Tar_unix.write_header header fd with | Ok () -> - Tar_unix.really_write fd Tar.Header.zero_block; - Tar_unix.really_write fd Tar.Header.zero_block; + (match Tar_unix.write_end fd with + | Ok () -> () + | Error `Msg msg -> + Alcotest.failf "error writing end %s" msg) | Error `Msg msg -> Alcotest.failf "error writing header %s" msg + | Error `Unix (e, f, a) -> + Alcotest.failf "error writing header - unix error %s %s %s" + (Unix.error_message e) f a ) ~finally:(fun () -> Unix.close fd); (* Read it back and verify the header was read *) let fd = Unix.openfile filename [ O_RDONLY; O_CLOEXEC ] 0 in @@ -262,7 +267,6 @@ let can_list_longlink_implicit_dir () = | Error `Fatal e -> Alcotest.failf "unexpected error: %a" Tar.pp_error e | Error `Eof -> Alcotest.fail "unexpected end of file") - let starts_with ~prefix s = let len_s = String.length s and len_pre = String.length prefix in diff --git a/unix/tar_lwt_unix.ml b/unix/tar_lwt_unix.ml index 98bfbf8..c0ff4a2 100644 --- a/unix/tar_lwt_unix.ml +++ b/unix/tar_lwt_unix.ml @@ -187,17 +187,26 @@ let header_of_file ?level file = in Lwt.return (Ok hdr) -let append_file ?level ?header filename fd = +let write_strings fd datas = let open Lwt_result.Infix in - (match header with - | None -> header_of_file ?level filename - | Some x -> Lwt.return (Ok x)) >>= fun header -> - Lwt_result.lift (Tar.encode_header ?level header) >>= fun header_strings -> Lwt_list.fold_left_s (fun acc d -> Lwt_result.lift acc >>= fun _written -> Lwt_result.map_error unix_err_to_msg (safe (Lwt_unix.write_string fd d 0) (String.length d))) - (Ok 0) header_strings >>= fun _written -> + (Ok 0) datas >|= fun _written -> + () + +let write_header ?level header fd = + let open Lwt_result.Infix in + Lwt_result.lift (Tar.encode_header ?level header) >>= fun header_strings -> + write_strings fd header_strings + +let append_file ?level ?header filename fd = + let open Lwt_result.Infix in + (match header with + | None -> header_of_file ?level filename + | Some x -> Lwt.return (Ok x)) >>= fun header -> + write_header ?level header fd >>= fun () -> Lwt_result.map_error unix_err_to_msg (safe Lwt_unix.(openfile filename [ O_RDONLY ]) 0) >>= fun src -> (* TOCTOU [also, header may not be valid for file] *) @@ -209,20 +218,10 @@ let append_file ?level ?header filename fd = let write_global_extended_header ?level header fd = let open Lwt_result.Infix in Lwt_result.lift (Tar.encode_global_extended_header ?level header) >>= fun header_strings -> - Lwt_list.fold_left_s (fun acc d -> - Lwt_result.lift acc >>= fun _written -> - Lwt_result.map_error unix_err_to_msg - (safe (Lwt_unix.write_string fd d 0) (String.length d))) - (Ok 0) header_strings >|= fun _written -> - () + write_strings fd header_strings let write_end fd = - let open Lwt_result.Infix in - Lwt_result.map_error unix_err_to_msg - (safe - (Lwt_unix.write_string fd (Tar.Header.zero_block ^ Tar.Header.zero_block) 0) - (Tar.Header.length + Tar.Header.length)) >|= fun _written -> - () + write_strings fd [ Tar.Header.zero_block ; Tar.Header.zero_block ] let create ?level ?global ?(filter = fun _ -> true) ~src dst = let open Lwt_result.Infix in diff --git a/unix/tar_lwt_unix.mli b/unix/tar_lwt_unix.mli index a4c3d47..cc8ae47 100644 --- a/unix/tar_lwt_unix.mli +++ b/unix/tar_lwt_unix.mli @@ -63,6 +63,10 @@ val append_file : ?level:Tar.Header.compatibility -> ?header:Tar.Header.t -> string -> Lwt_unix.file_descr -> (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result Lwt.t +val write_header : ?level:Tar.Header.compatibility -> + Tar.Header.t -> Lwt_unix.file_descr -> + (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result Lwt.t + (** [write_global_extended_header ~level hdr fd] writes the extended header [hdr] to [fd]. *) val write_global_extended_header : ?level:Tar.Header.compatibility -> diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index 394fd60..b4c04db 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -186,19 +186,26 @@ let header_of_file ?level file = Ok (Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name ~uname ~gname ~devmajor ~devminor file stat.Unix.LargeFile.st_size) -let append_file ?level ?header filename fd = - let* header = match header with - | None -> header_of_file ?level filename - | Some x -> Ok x - in - let* header_strings = Tar.encode_header ?level header in +let write_strings fd datas = let* _written = List.fold_left (fun acc d -> let* _written = acc in Result.map_error unix_err_to_msg (safe (Unix.write_substring fd d 0) (String.length d))) - (Ok 0) header_strings + (Ok 0) datas + in + Ok () + +let write_header ?level header fd = + let* header_strings = Tar.encode_header ?level header in + write_strings fd header_strings + +let append_file ?level ?header filename fd = + let* header = match header with + | None -> header_of_file ?level filename + | Some x -> Ok x in + let* () = write_header ?level header fd in let* src = Result.map_error unix_err_to_msg (safe Unix.(openfile filename [ O_RDONLY ]) 0) @@ -210,23 +217,10 @@ let append_file ?level ?header filename fd = let write_global_extended_header ?level header fd = let* header_strings = Tar.encode_global_extended_header ?level header in - let* _written = - List.fold_left (fun acc d -> - let* _written = acc in - Result.map_error unix_err_to_msg - (safe (Unix.write_substring fd d 0) (String.length d))) - (Ok 0) header_strings - in - Ok () + write_strings fd header_strings let write_end fd = - let* _written = - Result.map_error unix_err_to_msg - (safe - (Unix.write_substring fd (Tar.Header.zero_block ^ Tar.Header.zero_block) 0) - (Tar.Header.length + Tar.Header.length)) - in - Ok () + write_strings fd [ Tar.Header.zero_block ; Tar.Header.zero_block ] let create ?level ?global ?(filter = fun _ -> true) ~src dst = let* dst_fd = diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index 58423e5..b8247b0 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -63,6 +63,10 @@ val append_file : ?level:Tar.Header.compatibility -> ?header:Tar.Header.t -> string -> Unix.file_descr -> (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result +val write_header : ?level:Tar.Header.compatibility -> + Tar.Header.t -> Unix.file_descr -> + (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result + (** [write_global_extended_header ~level hdr fd] writes the extended header [hdr] to [fd]. *) val write_global_extended_header : ?level:Tar.Header.compatibility -> From 281883bef43347f575efe21409f7a346304c7ea7 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sun, 4 Feb 2024 13:00:24 +0100 Subject: [PATCH 11/34] more tests are working now --- lib_test/dune | 3 +- lib_test/global_extended_headers_test.ml | 163 ++++++---------- lib_test/parse_test.ml | 34 ++-- mirage/tar_mirage.ml | 229 ++++++++++++----------- unix/tar_unix.mli | 2 +- 5 files changed, 194 insertions(+), 237 deletions(-) diff --git a/lib_test/dune b/lib_test/dune index 439f9fc..79ed943 100644 --- a/lib_test/dune +++ b/lib_test/dune @@ -1,6 +1,6 @@ (tests (names parse_test write_test allocate_set_partial_test global_extended_headers_test) - (package tar-unix) + (package tar-mirage) (libraries mirage-block-unix mirage-block @@ -9,4 +9,5 @@ alcotest-lwt lwt tar-unix + tar-mirage )) diff --git a/lib_test/global_extended_headers_test.ml b/lib_test/global_extended_headers_test.ml index a5ae6de..c130382 100644 --- a/lib_test/global_extended_headers_test.ml +++ b/lib_test/global_extended_headers_test.ml @@ -1,37 +1,5 @@ let level = Tar.Header.Ustar -module Writer = struct - type out_channel = Stdlib.out_channel - type 'a io = 'a - let really_write oc str = - output_string oc str -end - -module HW = Tar.HeaderWriter - (struct type 'a t = 'a - let ( >>= ) x f = f x - let return x = x end) - (Writer) - -module Reader = struct - type in_channel = Stdlib.in_channel - type 'a io = 'a - let really_read ic buf = - really_input ic buf 0 (Bytes.length buf) - let skip ic len = - let cur = pos_in ic in - seek_in ic (cur + len) - let read ic buf = - let max = Bytes.length buf in - input ic buf 0 max -end - -module HR = Tar.HeaderReader - (struct type 'a t = 'a - let ( >>= ) x f = f x - let return x = x end) - (Reader) - let make_extended user_id = Tar.Header.Extended.make ~user_id () @@ -41,92 +9,67 @@ let make_file = let name = "file" ^ string_of_int !gen in incr gen; let hdr = Tar.Header.make name 0L in - hdr, fun cout -> - Tar.Header.zero_padding hdr - |> output_string cout + hdr + +let ( let* ) = Result.bind (* Tests that global and per-file extended headers correctly override each other. *) let use_global_extended_headers _test_ctxt = (* Write an archive using global and per-file pax extended headers *) begin try Sys.remove "test.tar" with _ -> () end; - let cout = open_out_bin "test.tar" in + let cout = Unix.openfile "test.tar" [ Unix.O_CREAT ; Unix.O_WRONLY ] 0o644 in let g0 = make_extended 1000 in - let hdr, f = make_file () in - match HW.write_global_extended_header g0 cout with - | Error `Msg msg -> Alcotest.failf "failed to write header %s" msg + let g1 = make_extended 3000 in + match + Fun.protect ~finally:(fun () -> Unix.close cout) + (fun () -> + let* () = Tar_unix.write_global_extended_header ~level g0 cout in + let hdr = make_file () in + let* () = Tar_unix.write_header ~level hdr cout in + let hdr = make_file () in + let hdr = { hdr with Tar.Header.extended = Some (make_extended 2000) } in + let* () = Tar_unix.write_header ~level hdr cout in + let hdr = make_file () in + let* () = Tar_unix.write_header ~level hdr cout in + let hdr = make_file () in + let* () = Tar_unix.write_global_extended_header ~level g1 cout in + let* () = Tar_unix.write_header ~level hdr cout in + Tar_unix.write_end cout) + with + | Error `Msg msg -> Alcotest.failf "failed to write something: %s" msg + | Error `Unix (err, f, a) -> + Alcotest.failf "failed to write: unix error %s %s %s" (Unix.error_message err) f a | Ok () -> - match HW.write ~level hdr cout with - | Error `Msg msg -> Alcotest.failf "failed to write header %s" msg - | Ok () -> - f cout; - let hdr, f = make_file () in - let hdr = { hdr with Tar.Header.extended = Some (make_extended 2000) } in - match HW.write ~level hdr cout with - | Error `Msg msg -> Alcotest.failf "failed to write header %s" msg - | Ok () -> - f cout; - let hdr, f = make_file () in - match HW.write ~level hdr cout with - | Error `Msg msg -> Alcotest.failf "failed to write header %s" msg - | Ok () -> - f cout; - let g1 = make_extended 3000 in - let hdr, f = make_file () in - match HW.write_global_extended_header g1 cout with - | Error `Msg msg -> Alcotest.failf "failed to write header %s" msg - | Ok () -> - match HW.write ~level hdr cout with - | Error `Msg msg -> Alcotest.failf "failed to write header %s" msg - | Ok () -> - f cout; - Writer.really_write cout Tar.Header.zero_block; - Writer.really_write cout Tar.Header.zero_block; - close_out cout; - (* Read the same archive, testing that headers have been squashed. *) - let cin = open_in_bin "test.tar" in - let global = ref None in - let header = - let pp ppf hdr = Fmt.pf ppf "%s" (Tar.Header.Extended.to_detailed_string hdr) in - Alcotest.testable (fun ppf hdr -> Fmt.pf ppf "%a" Fmt.(option pp) hdr) ( = ) - in - ( match HR.read ~global:!global cin with - | Ok (hdr, global') -> - Alcotest.check header "expected global header" (Some g0) global'; - global := global'; - Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id; - let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in - Reader.skip cin to_skip; - | Error `Eof -> failwith "Couldn't read header, end of file" - | Error (`Fatal err) -> Fmt.failwith "Couldn't read header: %a" Tar.pp_error err ); - ( match HR.read ~global:!global cin with - | Ok (hdr, global') -> - Alcotest.check header "expected global header" (Some g0) global'; - global := global'; - Alcotest.(check int) "expected user" 2000 hdr.Tar.Header.user_id; - let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in - Reader.skip cin to_skip; - | Error _ -> failwith "Couldn't read header" ); - ( match HR.read ~global:!global cin with - | Ok (hdr, global') -> - Alcotest.check header "expected global header" (Some g0) global'; - global := global'; - Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id; - let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in - Reader.skip cin to_skip; - | Error _ -> failwith "Couldn't read header" ); - ( match HR.read ~global:!global cin with - | Ok (hdr, global') -> - Alcotest.check header "expected global header" (Some g1) global'; - global := global'; - Alcotest.(check int) "expected user" 3000 hdr.Tar.Header.user_id; - let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in - Reader.skip cin to_skip; - | Error _ -> failwith "Couldn't read header" ); - ( match HR.read ~global:!global cin with - | Error `Eof -> () - | _ -> failwith "Should have found EOF"); - () + (* Read the same archive, testing that headers have been squashed. *) + let header = + let pp ppf hdr = Fmt.pf ppf "%s" (Tar.Header.Extended.to_detailed_string hdr) in + Alcotest.testable (fun ppf hdr -> Fmt.pf ppf "%a" Fmt.(option pp) hdr) ( = ) + in + let f _fd ?global hdr idx = + match idx with + | 0 -> + Alcotest.check header "expected global header" (Some g0) global; + Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id; + Ok 1 + | 1 -> + Alcotest.check header "expected global header" (Some g0) global; + Alcotest.(check int) "expected user" 2000 hdr.Tar.Header.user_id; + Ok 2 + | 2 -> + Alcotest.check header "expected global header" (Some g0) global; + Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id; + Ok 3 + | 3 -> + Alcotest.check header "expected global header" (Some g1) global; + Alcotest.(check int) "expected user" 3000 hdr.Tar.Header.user_id; + Ok 4 + | _ -> Alcotest.fail "too many headers" + in + match Tar_unix.fold f "test.tar" 0 with + | Ok 4 -> () + | Ok n -> Alcotest.failf "early abort, expected 4, received %u" n + | Error e -> Alcotest.failf "failed to read: %a" Tar_unix.pp_decode_error e let () = let suite = "tar - pax global extended headers", [ diff --git a/lib_test/parse_test.ml b/lib_test/parse_test.ml index 2c82f26..cd17f57 100644 --- a/lib_test/parse_test.ml +++ b/lib_test/parse_test.ml @@ -235,15 +235,14 @@ let can_list_long_pax_tar () = - Reynir *) let can_list_pax_implicit_dir () = - let fd = Unix.openfile "lib_test/pax-shenanigans.tar" [ O_RDONLY; O_CLOEXEC ] 0x0 in - Fun.protect ~finally:(fun () -> Unix.close fd) - (fun () -> - match Tar_unix.HeaderReader.read ~global:None fd with - | Error `Fatal e -> Alcotest.failf "unexpected error: %a" Tar.pp_error e - | Error `Eof -> Alcotest.fail "unexpected end of file" - | Ok (hdr, _global) -> - Alcotest.(check link) "is directory" Tar.Header.Link.Directory hdr.link_indicator; - Alcotest.(check string) "filename is patched" "clearly/a/directory/" hdr.file_name) + let f _fd ?global:_ hdr () = + Alcotest.(check link) "is directory" Tar.Header.Link.Directory hdr.Tar.Header.link_indicator; + Alcotest.(check string) "filename is patched" "clearly/a/directory/" hdr.file_name; + Ok () + in + match Tar_unix.fold f "lib_test/pax-shenanigans.tar" () with + | Ok () -> () + | Error e -> Alcotest.failf "unexpected error: %a" Tar_unix.pp_decode_error e (* Sample tar generated with commit 1583f71ea33b2836d3fb996ac7dc35d55abe2777: [let buf = @@ -257,15 +256,14 @@ let can_list_pax_implicit_dir () = Tar.Header.marshal ~level (Cstruct.shift buf 1024) hdr; buf] *) let can_list_longlink_implicit_dir () = - let fd = Unix.openfile "lib_test/long-implicit-dir.tar" [ O_RDONLY; O_CLOEXEC ] 0x0 in - Fun.protect ~finally:(fun () -> Unix.close fd) - (fun () -> - match Tar_unix.HeaderReader.read ~global:None fd with - | Ok (hdr, _global) -> - Alcotest.(check link) "is directory" Tar.Header.Link.Directory hdr.link_indicator; - Alcotest.(check string) "filename is patched" "some/long/name/for/a/directory/" hdr.file_name - | Error `Fatal e -> Alcotest.failf "unexpected error: %a" Tar.pp_error e - | Error `Eof -> Alcotest.fail "unexpected end of file") + let f _fd ?global:_ hdr () = + Alcotest.(check link) "is directory" Tar.Header.Link.Directory hdr.Tar.Header.link_indicator; + Alcotest.(check string) "filename is patched" "some/long/name/for/a/directory/" hdr.file_name; + Ok () + in + match Tar_unix.fold f "lib_test/long-implicit-dir.tar" () with + | Ok () -> () + | Error e -> Alcotest.failf "unexpected error: %a" Tar_unix.pp_decode_error e let starts_with ~prefix s = let len_s = String.length s diff --git a/mirage/tar_mirage.ml b/mirage/tar_mirage.ml index 35f5b55..4e45bbc 100644 --- a/mirage/tar_mirage.ml +++ b/mirage/tar_mirage.ml @@ -75,42 +75,66 @@ module Make_KV_RO (BLOCK : Mirage_block.S) = struct in Lwt.return r - module Reader = struct - type in_channel = { - b: BLOCK.t; - (** offset in bytes *) - mutable offset: int64; - info: Mirage_block.info; - } - type 'a io = 'a Lwt.t - let really_read in_channel buffer = - let len = Bytes.length buffer in - assert(len <= 512); - (* Tar assumes 512 byte sectors, but BLOCK might have 4096 byte sectors for example *) - let sector_size = in_channel.info.Mirage_block.sector_size in - let sector' = Int64.(div in_channel.offset (of_int sector_size)) in - let sector_aligned_len = - if len mod sector_size == 0 then len else - len + (sector_size - len mod sector_size) - in - let tmp = Cstruct.create sector_aligned_len in - BLOCK.read in_channel.b sector' [ tmp ] - >>= function - | Error e -> failwith (Format.asprintf "Failed to read sector %Ld from block device: %a" sector' - BLOCK.pp_error e) - | Ok () -> - (* If the BLOCK sector size is big, then we need to select the 512 bytes we want *) - let offset = Int64.(to_int (sub in_channel.offset (mul sector' (of_int sector_size)))) in - in_channel.offset <- Int64.(add in_channel.offset (of_int len)); - Cstruct.blit_to_bytes tmp offset buffer 0 len; - Lwt.return_unit - let skip in_channel n = - in_channel.offset <- Int64.(add in_channel.offset (of_int n)); - Lwt.return_unit - let _get_current_tar_sector in_channel = Int64.div in_channel.offset 512L - - end - module HR = Tar.HeaderReader(Lwt)(Reader) + let read_data info b offset buffer len = + assert(len <= 512); + (* Tar assumes 512 byte sectors, but BLOCK might have 4096 byte sectors for example *) + let sector_size = info.Mirage_block.sector_size in + let sector' = Int64.(div offset (of_int sector_size)) in + let sector_aligned_len = + if len mod sector_size == 0 then + len + else + len + (sector_size - len mod sector_size) + in + let tmp = Cstruct.create sector_aligned_len in + BLOCK.read b sector' [ tmp ] >>= function + | Error e -> + Lwt.return (Error (`Msg + (Format.asprintf "Failed to read sector %Ld from block device: %a" sector' + BLOCK.pp_error e))) + | Ok () -> + (* If the BLOCK sector size is big, then we need to select the 512 bytes we want *) + let offset_in_cs = Int64.(to_int (sub offset (mul sector' (of_int sector_size)))) in + Cstruct.blit_to_bytes tmp offset_in_cs buffer 0 len; + Lwt.return (Ok ()) + + let fold info b f init = + let open Lwt_result.Infix in + let rec go t offset ?global ?data acc = + (match data with + | None -> + let buf = Bytes.make Tar.Header.length '\000' in + read_data info b offset buf Tar.Header.length >|= fun () -> + Int64.(add offset (of_int Tar.Header.length)), Bytes.unsafe_to_string buf + | Some data -> + Lwt.return (Ok (offset, data))) >>= fun (offset, data) -> + match Tar.decode t data with + | Ok (t, Some `Header hdr, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + f offset ?global hdr acc >>= fun acc' -> + let off' = + Int64.(add offset (add hdr.Tar.Header.file_size + (of_int (Tar.Header.compute_zero_padding_length hdr)))) + in + go t off' ?global acc' + | Ok (t, Some `Skip n, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + let off' = Int64.(add offset (of_int n)) in + go t off' ?global acc + | Ok (t, Some `Read n, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + let buf = Bytes.make n '\000' in + read_data info b offset buf n >>= fun () -> + let data = Bytes.unsafe_to_string buf in + let off' = Int64.(add offset (of_int n)) in + go t off' ?global ~data acc + | Ok (t, None, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + go t offset ?global acc + | Error `Eof -> Lwt.return (Ok acc) + | Error `Fatal _ as e -> Lwt.return e + in + go (Tar.decode_state ()) 0L init (* [read_partial_sector t sector_start ~offset ~length dst] reads a single sector and blits [length] bytes from [offset] into [dst] @@ -255,33 +279,37 @@ module Make_KV_RO (BLOCK : Mirage_block.S) = struct let ssize = info.Mirage_block.sector_size in if ssize mod 512 <> 0 || ssize < 512 then invalid_arg "Sector size needs to be >= 512 and a multiple of 512"; - let in_channel = { Reader.b; offset = 0L; info } in - let rec loop ~global map = - HR.read ~global in_channel >>= function - | Error `Eof -> Lwt.return map - | Error `Fatal e -> - Format.kasprintf failwith "Error reading archive: %a" Tar.pp_error e - | Ok (tar, global) -> - let filename = trim_slash tar.Tar.Header.file_name in - let map = - if filename = "" then - map - else - let data_tar_offset = Int64.div in_channel.Reader.offset 512L in - let v_or_d = if is_dict filename then Dict (tar, StringMap.empty) else Value (tar, data_tar_offset) in - insert map (Mirage_kv.Key.v filename) v_or_d - in - Reader.skip in_channel (Int64.to_int tar.Tar.Header.file_size) >>= fun () -> - Reader.skip in_channel (Tar.Header.compute_zero_padding_length tar) >>= fun () -> - loop ~global map + let f offset ?global:_ hdr (_, map) = + let filename = trim_slash hdr.Tar.Header.file_name in + let map = + if filename = "" then + map + else + let data_tar_offset = Int64.(div offset (of_int Tar.Header.length)) in + let v_or_d = + if is_dict filename then + Dict (hdr, StringMap.empty) + else + Value (hdr, data_tar_offset) + in + insert map (Mirage_kv.Key.v filename) v_or_d + in + let eof = Int64.(add offset + (add hdr.Tar.Header.file_size + (of_int (Tar.Header.compute_zero_padding_length hdr)))) + in + Lwt.return (Ok (eof, map)) in - let root = StringMap.empty in - loop ~global:None root >>= fun map -> - (* This is after the two [zero_block]s *) - let end_of_archive = in_channel.Reader.offset in - let map = Dict (Tar.Header.make "/" 0L, map) in - let write_lock = Lwt_mutex.create () in - Lwt.return ({ b; map; info; end_of_archive; write_lock }) + fold info b f (0L, StringMap.empty) >>= function + | Error `Fatal e -> + Format.kasprintf failwith "Fatal error reading archive: %a" Tar.pp_error e + | Error `Msg msg -> + Format.kasprintf failwith "Error reading archive: %s" msg + | Ok (end_of_archive, map) -> + let end_of_archive = Int64.(add end_of_archive (of_int (2 * Tar.Header.length))) in + let map = Dict (Tar.Header.make "/" 0L, map) in + let write_lock = Lwt_mutex.create () in + Lwt.return ({ b; map; info; end_of_archive; write_lock }) let disconnect _ = Lwt.return_unit @@ -292,7 +320,14 @@ module Make_KV_RW (CLOCK : Mirage_clock.PCLOCK) (BLOCK : Mirage_block.S) = struc include Make_KV_RO(BLOCK) - type write_error = [ `Block of BLOCK.error | `Block_write of BLOCK.write_error | Mirage_kv.write_error | `Entry_already_exists | `Path_segment_is_a_value | `Append_only | `Write_header of string ] + type write_error = [ + | `Block of BLOCK.error + | `Block_write of BLOCK.write_error + | Mirage_kv.write_error + | `Entry_already_exists + | `Path_segment_is_a_value + | `Append_only + | `Msg of string ] let pp_write_error ppf = function | `Block e -> Fmt.pf ppf "read error while writing: %a" BLOCK.pp_error e @@ -301,7 +336,7 @@ module Make_KV_RW (CLOCK : Mirage_clock.PCLOCK) (BLOCK : Mirage_block.S) = struc | `Entry_already_exists -> Fmt.string ppf "entry already exists" | `Path_segment_is_a_value -> Fmt.string ppf "path segment is a value" | `Append_only -> Fmt.string ppf "append only" - | `Write_header msg -> Fmt.pf ppf "writing tar header failed: %s" msg + | `Msg msg -> Fmt.pf ppf "writing tar header failed: %s" msg let write t sector_start buffers = Lwt_result.map_error (fun e -> `Block_write e) @@ -364,51 +399,31 @@ module Make_KV_RW (CLOCK : Mirage_clock.PCLOCK) (BLOCK : Mirage_block.S) = struc let map = remove map key in Dict (root, map) - module Writer = struct - type out_channel = { - b: BLOCK.t; - (** offset in bytes *) - mutable offset: int64; - info: Mirage_block.info; - } - type 'a io = 'a Lwt.t - exception Read of BLOCK.error - exception Write of BLOCK.write_error - let really_write out_channel str = - assert (String.length str <= Tar.Header.length); - let data = - let cs = Cstruct.create Tar.Header.length in - Cstruct.blit_from_string str 0 cs 0 (String.length str); - cs - in - let sector_size = out_channel.info.sector_size in - let sector = Int64.(div out_channel.offset (of_int sector_size)) in - let block = Cstruct.create sector_size in - BLOCK.read out_channel.b sector [ block ] >>= function - | Error e -> raise (Read e) - | Ok () -> - let start_offset = Int64.to_int out_channel.offset mod sector_size in - Cstruct.blit data 0 block start_offset (Cstruct.length data); - BLOCK.write out_channel.b sector [ block ] >>= function - | Error e -> raise (Write e) - | Ok () -> - Lwt.return_unit - end - module HW = Tar.HeaderWriter(Lwt)(Writer) + let write_data info b offset buffer = + assert (String.length buffer <= Tar.Header.length); + let sector_size = info.Mirage_block.sector_size in + let sector = Int64.(div offset (of_int sector_size)) in + let block = Cstruct.create sector_size in + BLOCK.read b sector [ block ] >>= function + | Error e -> Lwt.return (Error (`Block e)) + | Ok () -> + let start_offset = Int64.to_int offset mod sector_size in + Cstruct.blit_from_string buffer 0 block start_offset (String.length buffer); + BLOCK.write b sector [ block ] >>= function + | Error e -> Lwt.return (Error (`Block_write e)) + | Ok () -> Lwt.return (Ok ()) let write_header (t : t) header_start_bytes hdr = - let hw = Writer.{ b = t.b ; offset = header_start_bytes ; info = t.info } in (* it is important we write at level [Ustar] at most as we assume the header(s) taking up exactly 512 bytes. With [GNU] level extra blocks may be used for long names. *) - Lwt.catch - (fun () -> HW.write ~level:Tar.Header.Ustar hdr hw >|= function - | Ok () -> Ok () - | Error `Msg msg -> Error (`Write_header msg)) - (function - | Writer.Read e -> Lwt.return (Error (`Block e)) - | Writer.Write e -> Lwt.return (Error (`Block_write e)) - | exn -> raise exn) + let open Lwt_result.Infix in + Lwt_result.lift (Tar.encode_header ~level:Tar.Header.Ustar hdr) >>= fun datas -> + Lwt_list.fold_left_s (fun acc buf -> + Lwt_result.lift acc >>= fun off' -> + write_data t.info t.b off' buf >|= fun () -> + Int64.(add off' (of_int (String.length buf)))) + (Ok header_start_bytes) datas let set t key data = Lwt_mutex.with_lock t.write_lock (fun () -> @@ -486,7 +501,7 @@ module Make_KV_RW (CLOCK : Mirage_clock.PCLOCK) (BLOCK : Mirage_block.S) = struc in write t (succ data_start_sector) remaining_sectors >>>= fun () -> (* finally write header and first block *) - write_header t header_start_bytes hdr >>>= fun () -> + write_header t header_start_bytes hdr >>>= fun _new_offset -> (* read in slack at beginning which could include the header *) read_partial_sector t data_start_sector first_sector ~offset:0L ~length:data_start_sector_offset >>>= fun () -> @@ -555,7 +570,7 @@ module Make_KV_RW (CLOCK : Mirage_clock.PCLOCK) (BLOCK : Mirage_block.S) = struc | Error _ as e -> e end >>>= fun (hdr, data_offset) -> let hdr = { hdr with Tar.Header.file_name = Mirage_kv.Key.to_string dest } in - write_header t Int64.(sub (mul data_offset (of_int Tar.Header.length)) (of_int Tar.Header.length)) hdr >>>= fun () -> + write_header t Int64.(sub (mul data_offset (of_int Tar.Header.length)) (of_int Tar.Header.length)) hdr >>>= fun _new_off -> t.map <- update_insert t.map dest hdr data_offset; t.map <- update_remove t.map source; Lwt_result.return ()) @@ -680,7 +695,7 @@ module Make_KV_RW (CLOCK : Mirage_clock.PCLOCK) (BLOCK : Mirage_block.S) = struc ~length:(sub sector_size last_sector_offset) end >>>= fun () -> write t to_zero_start_sector (Array.to_list data) >>>= fun () -> - write_header t header_start_bytes hdr >>>= fun () -> + write_header t header_start_bytes hdr >>>= fun _new_offset -> let tar_offset = div (sub t.end_of_archive (of_int Tar.Header.length)) (of_int Tar.Header.length) in t.end_of_archive <- end_bytes; t.map <- update_insert t.map key hdr tar_offset; diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index b8247b0..283795f 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -74,4 +74,4 @@ val write_global_extended_header : ?level:Tar.Header.compatibility -> (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result (** [write_end fd] writes the tar end marker to [fd]. *) -val write_end : Unix.file_descr -> (unit, [ `Msg of string ]) result +val write_end : Unix.file_descr -> (unit, [> `Msg of string ]) result From 60d6faa95767416161777acc712a9f691068fa8a Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sun, 4 Feb 2024 13:22:28 +0100 Subject: [PATCH 12/34] revive transform test --- lib/tar_gz.ml | 3 +-- lib/tar_gz.mli | 3 +-- lib_test/parse_test.ml | 42 ++++++++++++++++++++++++++++-------------- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/lib/tar_gz.ml b/lib/tar_gz.ml index c64e20c..e197482 100644 --- a/lib/tar_gz.ml +++ b/lib/tar_gz.ml @@ -1,4 +1,4 @@ -(*(* +(* * Copyright (C) 2022 Romain Calascibetta * * Permission to use, copy, modify, and distribute this software for any @@ -230,4 +230,3 @@ module Make | `End _gz -> Async.return () in until_end (Gz.Def.src state.gz De.bigstring_empty 0 0) end -*) diff --git a/lib/tar_gz.mli b/lib/tar_gz.mli index 3626a08..de18b76 100644 --- a/lib/tar_gz.mli +++ b/lib/tar_gz.mli @@ -1,4 +1,4 @@ -(*(* +(* * Copyright (C) 2022 Romain Calascibetta * * Permission to use, copy, modify, and distribute this software for any @@ -72,4 +72,3 @@ module Make module HeaderWriter : Tar.HEADERWRITER with type out_channel = out_channel and type 'a io = 'a Async.t end - *) diff --git a/lib_test/parse_test.ml b/lib_test/parse_test.ml index cd17f57..912038b 100644 --- a/lib_test/parse_test.ml +++ b/lib_test/parse_test.ml @@ -275,25 +275,39 @@ let starts_with ~prefix s = in len_s >= len_pre && aux 0 let can_transform_tar () = - (* let level = Tar.Header.Ustar in with_tar ~level () @@ fun tar_in _file_list -> - let fd_in = Unix.openfile tar_in [ O_RDONLY; O_CLOEXEC ] 0 in let tar_out = Filename.temp_file "tar-transformed" ".tar" in let fd_out = Unix.openfile tar_out [ O_WRONLY; O_CREAT; O_CLOEXEC ] 0o644 in with_tmpdir @@ fun temp_dir -> - Tar_unix.Archive.transform ~level (fun hdr -> - {hdr with Tar.Header.file_name = Filename.concat temp_dir hdr.file_name}) - fd_in fd_out; - Unix.close fd_in; - Unix.close fd_out; - let fd_in = Unix.openfile tar_out [ O_RDONLY; O_CLOEXEC ] 0 in - Tar_unix.Archive.with_next_file fd_in ~global:None (fun fd_file _global hdr -> - Alcotest.(check string) "Filename was transformed" temp_dir - (String.sub hdr.file_name 0 (min (String.length hdr.file_name) (String.length temp_dir))); - Tar_unix.skip fd_file (Int64.to_int hdr.file_size)); - Unix.close fd_in - *) () + let f fd ?global:_ hdr _ = + ignore Unix.(lseek fd (Int64.to_int hdr.Tar.Header.file_size) SEEK_CUR); + let hdr = + { hdr with + Tar.Header.file_name = Filename.concat temp_dir hdr.file_name; + file_size = 0L + } + in + match Tar_unix.write_header ~level hdr fd_out with + | Ok () -> Ok () + | Error _ -> Alcotest.fail "error writing header" + in + match Tar_unix.fold f tar_in () with + | Error e -> Alcotest.failf "error folding %a" Tar_unix.pp_decode_error e + | Ok () -> + match Tar_unix.write_end fd_out with + | Error _ -> Alcotest.fail "couldn't write end" + | Ok () -> + Unix.close fd_out; + let f fd ?global:_ hdr _ = + ignore Unix.(lseek fd (Int64.to_int hdr.Tar.Header.file_size) SEEK_CUR); + Alcotest.(check string) "Filename was transformed" temp_dir + (String.sub hdr.file_name 0 (min (String.length hdr.file_name) (String.length temp_dir))); + Ok () + in + match Tar_unix.fold f tar_out () with + | Error e -> Alcotest.failf "error folding2 %a" Tar_unix.pp_decode_error e + | Ok () -> () module Block4096 = struct include Block From 462063b5ee6a770c87bbe198ef8bc01c28e07a76 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sun, 4 Feb 2024 13:28:20 +0100 Subject: [PATCH 13/34] test tar_unix, use fold for list --- lib_test/parse_test.ml | 130 ++++++++++------------------------------- 1 file changed, 30 insertions(+), 100 deletions(-) diff --git a/lib_test/parse_test.ml b/lib_test/parse_test.ml index 912038b..03d0bd9 100644 --- a/lib_test/parse_test.ml +++ b/lib_test/parse_test.ml @@ -31,70 +31,15 @@ module Unix = struct if Sys.win32 then truncate (convert_path `Windows path) else truncate path end -let rec with_restart op fd buf off len = - try op fd buf off len with - Unix.Unix_error (Unix.EINTR,_,_) -> - with_restart op fd buf off len - -let really_read fd buf = - let len = Bytes.length buf in - let rec loop offset = - if offset < len then - let n = with_restart Unix.read fd buf offset (len - offset) in - if n = 0 then raise End_of_file; - loop (offset + n) - in - loop 0 - -let run_reader fd = - let rec loop ?b t acc = - let data = match b with - | None -> - let b = Bytes.create Tar.Header.length in - really_read fd b; - Bytes.unsafe_to_string b - | Some s -> s - in - match Tar.decode t data with - | Ok (t, Some `Header hdr, _global) -> - print_endline hdr.Tar.Header.file_name; - ignore (Unix.lseek fd - (Int64.to_int hdr.Tar.Header.file_size + Tar.Header.compute_zero_padding_length hdr) - Unix.SEEK_CUR); - loop t (hdr :: acc) - | Ok (t, Some `Skip n, _global) -> - ignore (Unix.lseek fd n Unix.SEEK_CUR); - loop t acc - | Ok (t, Some `Read n, _global) -> - let b = Bytes.create n in - really_read fd b; - let b = Bytes.unsafe_to_string b in - loop ~b t acc - | Ok (t, None, _global) -> - loop t acc - | Error `Eof -> List.rev acc - | Error `Fatal e -> Alcotest.failf "unexpected error: %a" Tar.pp_error e - in - let t = Tar.decode_state () in - let r = loop t [] in - List.iter (fun h -> print_endline h.Tar.Header.file_name) r; - r - -let list fd = run_reader fd -(* let rec loop global acc = - match Tar_unix.HeaderReader.read ~global fd with - | Ok (hdr, global) -> - print_endline hdr.Tar.Header.file_name; - Tar_unix.skip fd - (Int64.to_int hdr.Tar.Header.file_size + Tar.Header.compute_zero_padding_length hdr); - loop global (hdr :: acc) - | Error `Eof -> - List.rev acc - | Error `Fatal e -> Alcotest.failf "unexpected error: %a" Tar.pp_error e +let list filename = + let f fd ?global:_ hdr acc = + print_endline hdr.Tar.Header.file_name; + ignore Unix.(lseek fd (Int64.to_int hdr.Tar.Header.file_size) SEEK_CUR); + Ok (hdr :: acc) in - let r = loop None [] in - List.iter (fun h -> print_endline h.Tar.Header.file_name) r; - r*) + match Tar_unix.fold f filename [] with + | Ok acc -> List.rev acc + | Error e -> Alcotest.failf "unexpected error: %a" Tar_unix.pp_decode_error e let pp_header f x = Fmt.pf f "%s" (Tar.Header.to_detailed_string x) let header = Alcotest.testable pp_header ( = ) @@ -153,10 +98,8 @@ let with_tar ?(level:Tar.Header.compatibility option) ?files ?(sector_size = 512 let can_read_tar () = with_tar () @@ fun tar_filename files -> - let fd = Unix.openfile tar_filename [ O_RDONLY; O_CLOEXEC ] 0 in - let files' = List.map (fun t -> t.Tar.Header.file_name) (list fd) in + let files' = List.map (fun t -> t.Tar.Header.file_name) (list tar_filename) in flush stdout; - Unix.close fd; let missing = set_difference files files' in let missing' = set_difference files' files in Alcotest.(check (list string)) "missing" [] missing; @@ -184,44 +127,31 @@ let can_write_pax () = (Unix.error_message e) f a ) ~finally:(fun () -> Unix.close fd); (* Read it back and verify the header was read *) - let fd = Unix.openfile filename [ O_RDONLY; O_CLOEXEC ] 0 in - Fun.protect - (fun () -> - match list fd with - | [ one ] -> Alcotest.(check int) "user_id" user_id one.Tar.Header.user_id - | xs -> Alcotest.failf "Headers = %a" (Fmt.list pp_header) xs - ) ~finally:(fun () -> Unix.close fd) - + match list filename with + | [ one ] -> Alcotest.(check int) "user_id" user_id one.Tar.Header.user_id + | xs -> Alcotest.failf "Headers = %a" (Fmt.list pp_header) xs let can_list_longlink_tar () = - let fd = Unix.openfile "lib_test/long.tar" [ O_RDONLY; O_CLOEXEC ] 0o0 in - Fun.protect - (fun () -> - let all = list fd in - let filenames = List.map (fun h -> h.Tar.Header.file_name) all in - (* List.iteri (fun i x -> Printf.fprintf stderr "%d: %s\n%!" i x) filenames; *) - let expected = [ - "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/"; - "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/BCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/"; - "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/BCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/CDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.txt"; - ] in - Alcotest.(check (list string)) "respects filenames" expected filenames - ) ~finally:(fun () -> Unix.close fd) + let all = list "lib_test/long.tar" in + let filenames = List.map (fun h -> h.Tar.Header.file_name) all in + (* List.iteri (fun i x -> Printf.fprintf stderr "%d: %s\n%!" i x) filenames; *) + let expected = [ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/"; + "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/BCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/"; + "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/BCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/CDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.txt"; + ] in + Alcotest.(check (list string)) "respects filenames" expected filenames let can_list_long_pax_tar () = - let fd = Unix.openfile "lib_test/long-pax.tar" [ O_RDONLY; O_CLOEXEC ] 0x0 in - Fun.protect - (fun () -> - let all = list fd in - let filenames = List.map (fun h -> h.Tar.Header.file_name) all in - (* List.iteri (fun i x -> Printf.fprintf stderr "%d: %s\n%!" i x) filenames; *) - let expected = [ - "t/"; - "t/someveryveryverylonggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggname"; - "t/someveryveryverylonggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggglink"; - ] in - Alcotest.(check (list string)) "respects filenames" expected filenames - ) ~finally:(fun () -> Unix.close fd) + let all = list "lib_test/long-pax.tar" in + let filenames = List.map (fun h -> h.Tar.Header.file_name) all in + (* List.iteri (fun i x -> Printf.fprintf stderr "%d: %s\n%!" i x) filenames; *) + let expected = [ + "t/"; + "t/someveryveryverylonggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggname"; + "t/someveryveryverylonggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggglink"; + ] in + Alcotest.(check (list string)) "respects filenames" expected filenames (* "pax-shenanigans.tar" is an archive with a regular file "placeholder" with a pax header "path=clearly/a/directory/". The resulting header has normal link From 2b49b1f5d07e700c30889a71c81f5557de3e070f Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sun, 4 Feb 2024 13:31:21 +0100 Subject: [PATCH 14/34] document write_header --- unix/tar_lwt_unix.mli | 1 + unix/tar_unix.mli | 1 + 2 files changed, 2 insertions(+) diff --git a/unix/tar_lwt_unix.mli b/unix/tar_lwt_unix.mli index cc8ae47..1282b38 100644 --- a/unix/tar_lwt_unix.mli +++ b/unix/tar_lwt_unix.mli @@ -63,6 +63,7 @@ val append_file : ?level:Tar.Header.compatibility -> ?header:Tar.Header.t -> string -> Lwt_unix.file_descr -> (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result Lwt.t +(** [write_header ~level hdr fd] writes the header [hdr] to [fd]. *) val write_header : ?level:Tar.Header.compatibility -> Tar.Header.t -> Lwt_unix.file_descr -> (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result Lwt.t diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index 283795f..3863ffd 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -63,6 +63,7 @@ val append_file : ?level:Tar.Header.compatibility -> ?header:Tar.Header.t -> string -> Unix.file_descr -> (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result +(** [write_header ~level hdr fd] writes the header [hdr] to [fd]. *) val write_header : ?level:Tar.Header.compatibility -> Tar.Header.t -> Unix.file_descr -> (unit, [ `Msg of string | `Unix of (Unix.error * string * string) ]) result From 2388f624f14cc78eea9240cda2122300ad673032 Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Wed, 7 Feb 2024 12:08:03 +0100 Subject: [PATCH 15/34] Purify fold and move it into Tar with a GADT, use it then for Tar_gz which will produce an other GADT value and Tar_{,lwt_}unix which evaluate our GADT --- lib/tar.ml | 41 +++++++++++ lib/tar.mli | 25 ++++++- lib/tar_gz.ml | 161 ++++++++++++++++++++++-------------------- lib/tar_gz.mli | 8 +++ unix/tar_lwt_unix.ml | 70 +++++++++--------- unix/tar_lwt_unix.mli | 6 +- unix/tar_unix.ml | 47 ++++-------- unix/tar_unix.mli | 2 +- 8 files changed, 212 insertions(+), 148 deletions(-) diff --git a/lib/tar.ml b/lib/tar.ml index 7fe8709..2666112 100644 --- a/lib/tar.ml +++ b/lib/tar.ml @@ -815,3 +815,44 @@ let encode_header ?level header = let encode_global_extended_header ?level global = encode_extended_header ?level `Global global + +type ('a, 'err) t = + | Really_read : int -> (string, 'err) t + | Read : int -> (string, 'err) t + | Seek : int -> (int, 'err) t + | Bind : ('a, 'err) t * ('a -> ('b, 'err) t) -> ('b, 'err) t + | Return : ('a, 'err) result -> ('a, 'err) t + +let ( let* ) x f = Bind (x, f) +let return x = Return x +let really_read n = Really_read n +let read n = Read n +let seek n = Seek n + +type ('a, 'err) fold = (?global:Header.Extended.t -> Header.t -> 'a -> ('a, 'err) result) -> 'a -> ('a, 'err) t + +let fold f init = + let rec go t ?global ?data acc = + let* data = match data with + | None -> really_read Header.length + | Some data -> return (Ok data) in + match decode t data with + | Ok (t, Some `Header hdr, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + let* acc' = return (f ?global hdr acc) in + let* _off = seek (Header.compute_zero_padding_length hdr) in + go t ?global acc' + | Ok (t, Some `Skip n, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + let* _off = seek n in + go t ?global acc + | Ok (t, Some `Read n, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + let* data = really_read n in + go t ?global ~data acc + | Ok (t, None, g) -> + let global = Option.fold ~none:global ~some:(fun g -> Some g) g in + go t ?global acc + | Error `Eof -> return (Ok acc) + | Error `Fatal _ as e -> return e in + go (decode_state ()) init diff --git a/lib/tar.mli b/lib/tar.mli index f0a24de..e6b5769 100644 --- a/lib/tar.mli +++ b/lib/tar.mli @@ -19,7 +19,7 @@ {e %%VERSION%% - {{:%%PKG_HOMEPAGE%% }homepage}} *) (** The type of errors that may occur. *) -type error = [`Checksum_mismatch | `Corrupt_pax_header | `Zero_block | `Unmarshal of string] +type error = [ `Checksum_mismatch | `Corrupt_pax_header | `Zero_block | `Unmarshal of string ] (** [pp_error ppf e] pretty prints the error [e] on the formatter [ppf]. *) val pp_error : Format.formatter -> [< error] -> unit @@ -123,7 +123,7 @@ module Header : sig (** Unmarshal a header block, returning [None] if it's all zeroes. This header block may be preceded by an [?extended] block which will override some fields. *) - val unmarshal : ?extended:Extended.t -> string -> (t, [`Zero_block | `Checksum_mismatch | `Unmarshal of string]) result + val unmarshal : ?extended:Extended.t -> string -> (t, [> `Zero_block | `Checksum_mismatch | `Unmarshal of string]) result (** Marshal a header block, computing and inserting the checksum. *) val marshal : ?level:compatibility -> bytes -> t -> (unit, [> `Msg of string ]) result @@ -157,7 +157,7 @@ val decode_state : ?global:Header.Extended.t -> unit -> decode_state further decoding until [`Eof] (or an error) occurs. *) val decode : decode_state -> string -> (decode_state * [ `Read of int | `Skip of int | `Header of Header.t ] option * Header.Extended.t option, - [ `Eof | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] ]) + [ `Eof | `Fatal of error ]) result (** [encode_header ~level hdr] encodes the header with the provided [level] @@ -170,3 +170,22 @@ val encode_header : ?level:Header.compatibility -> (** [encode_global_extended_header hdr] encodes the global extended header as a list of strings. *) val encode_global_extended_header : ?level:Header.compatibility -> Header.Extended.t -> (string list, [> `Msg of string ]) result + +(** {1 Pure implementation of [fold].} *) + +type ('a, 'err) t = + | Really_read : int -> (string, 'err) t + | Read : int -> (string, 'err) t + | Seek : int -> (int, 'err) t + | Bind : ('a, 'err) t * ('a -> ('b, 'err) t) -> ('b, 'err) t + | Return : ('a, 'err) result -> ('a, 'err) t + +val really_read : int -> (string, _) t +val read : int -> (string, _) t +val seek : int -> (int, _) t +val ( let* ) : ('a, 'err) t -> ('a -> ('b, 'err) t) -> ('b, 'err) t +val return : ('a, 'err) result -> ('a, 'err) t + +type ('a, 'err) fold = (?global:Header.Extended.t -> Header.t -> 'a -> ('a, 'err) result) -> 'a -> ('a, 'err) t + +val fold : ('a, [> `Fatal of error ]) fold diff --git a/lib/tar_gz.ml b/lib/tar_gz.ml index e197482..9bac895 100644 --- a/lib/tar_gz.ml +++ b/lib/tar_gz.ml @@ -14,15 +14,10 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) -module type READER = sig - type in_channel - type 'a io - val read : in_channel -> bytes -> int io -end - external ba_get_int32_ne : De.bigstring -> int -> int32 = "%caml_bigstring_get32" external ba_set_int32_ne : De.bigstring -> int -> int32 -> unit = "%caml_bigstring_set32" +(* let bigstring_to_string ?(off= 0) ?len ba = let len = match len with | Some len -> len @@ -41,6 +36,7 @@ let bigstring_to_string ?(off= 0) ?len ba = Bytes.set res i v done; Bytes.unsafe_to_string res +*) let bigstring_blit_string src ~src_off dst ~dst_off ~len = let len0 = len land 3 in @@ -71,6 +67,89 @@ let bigstring_blit_bytes src ~src_off dst ~dst_off ~len = Bytes.set dst (dst_off + i) v done +type decoder = + { mutable gz : Gz.Inf.decoder + ; ic_buffer : De.bigstring + ; oc_buffer : De.bigstring + ; tp_length : int + ; mutable pos : int } + +let really_read_through_gz + : decoder -> bytes -> (unit, 'err) Tar.t + = fun ({ ic_buffer; oc_buffer; tp_length; _ } as state) res -> + let open Tar in + let rec until_full_or_end gz (res, res_off, res_len) = + match Gz.Inf.decode gz with + | `Flush gz -> + let max = De.bigstring_length oc_buffer - Gz.Inf.dst_rem gz in + let len = min res_len max in + bigstring_blit_bytes oc_buffer ~src_off:0 res ~dst_off:res_off ~len; + if len < max + then ( state.pos <- len + ; state.gz <- gz + ; return (Ok ()) ) + else until_full_or_end (Gz.Inf.flush gz) (res, res_off + len, res_len - len) + | `End gz -> + let max = De.bigstring_length oc_buffer - Gz.Inf.dst_rem gz in + let len = min res_len max in + bigstring_blit_bytes oc_buffer ~src_off:0 res ~dst_off:res_off ~len; + if res_len > len + then return (Error `Eof) + else ( state.pos <- len + ; state.gz <- gz + ; return (Ok ()) ) + | `Await gz -> + let* tp_buffer = Tar.read tp_length in + let len = String.length tp_buffer in + bigstring_blit_string tp_buffer ~src_off:0 ic_buffer ~dst_off:0 ~len; + let gz = Gz.Inf.src gz ic_buffer 0 len in + until_full_or_end gz (res, res_off, res_len) + | `Malformed err -> return (Error (`Gz err)) in + let max = (De.bigstring_length oc_buffer - Gz.Inf.dst_rem state.gz) - state.pos in + let len = min (Bytes.length res) max in + bigstring_blit_bytes oc_buffer ~src_off:state.pos res ~dst_off:0 ~len; + if len < max + then ( state.pos <- state.pos + len + ; return (Ok ()) ) + else until_full_or_end (Gz.Inf.flush state.gz) (res, len, Bytes.length res - len) + +let really_read_through_gz decoder len = + let open Tar in + let res = Bytes.create len in + let* () = really_read_through_gz decoder res in + Tar.return (Ok (Bytes.unsafe_to_string res)) + +type error = [ `Fatal of Tar.error | `Eof | `Gz of string ] + +let seek_through_gz : decoder -> int -> (int, [> error ]) Tar.t = fun state len -> + let open Tar in + let* _buf = really_read_through_gz state len in + Tar.return (Ok 0 (* XXX(dinosaure): actually, [fold] ignores the result. *)) + +type 'err run = { run : 'a 'err. ('a, 'err) Tar.t -> ('a, 'err) result } [@@unboxed] + +let fold_with_gz + : run:[> error ] run -> _ -> _ -> _ + = fun ~run:{ run } f init -> + let rec go : type a. decoder -> (a, [> error ] as 'err) Tar.t -> (a, 'err) Tar.t = fun decoder -> function + | Tar.Really_read len -> really_read_through_gz decoder len + | Tar.Read _len -> assert false (* XXX(dinosaure): actually does not emit [Tar.Read]. *) + | Tar.Seek len -> seek_through_gz decoder len + | Tar.Return v -> Tar.return v + | Tar.Bind (x, f) -> + match run x with + | Ok value -> go decoder (f value) + | Error _ as err -> Tar.return err in + let decoder = + let oc_buffer = De.bigstring_create 0x1000 in + { gz= Gz.Inf.decoder `Manual ~o:oc_buffer + ; oc_buffer + ; ic_buffer= De.bigstring_create 0x1000 + ; tp_length= 0x1000 + ; pos= 0 } in + go decoder (Tar.fold f init) + +(* module Make (Async : Tar.ASYNC) (Writer : Tar.WRITER with type 'a io = 'a Async.t) @@ -108,75 +187,6 @@ module Make go gz (str, 0, String.length str) end - module Gz_reader = struct - type in_channel = - { mutable gz : Gz.Inf.decoder - ; ic_buffer : De.bigstring - ; oc_buffer : De.bigstring - ; tp_buffer : bytes - ; in_channel : Reader.in_channel - ; mutable pos : int } - - type 'a io = 'a Async.t - - let really_read - : in_channel -> bytes -> unit io - = fun ({ ic_buffer; oc_buffer; in_channel; tp_buffer; _ } as state) res -> - let rec until_full_or_end gz (res, res_off, res_len) = - match Gz.Inf.decode gz with - | `Flush gz -> - let max = De.bigstring_length oc_buffer - Gz.Inf.dst_rem gz in - let len = min res_len max in - bigstring_blit_bytes oc_buffer ~src_off:0 res ~dst_off:res_off ~len; - if len < max - then ( state.pos <- len - ; state.gz <- gz - ; Async.return () ) - else until_full_or_end (Gz.Inf.flush gz) (res, res_off + len, res_len - len) - | `End gz -> - let max = De.bigstring_length oc_buffer - Gz.Inf.dst_rem gz in - let len = min res_len max in - bigstring_blit_bytes oc_buffer ~src_off:0 res ~dst_off:res_off ~len; - if res_len > len - then raise End_of_file - else ( state.pos <- len - ; state.gz <- gz - ; Async.return () ) - | `Await gz -> - Reader.read in_channel tp_buffer >>= fun len -> - bigstring_blit_string (Bytes.unsafe_to_string tp_buffer) ~src_off:0 ic_buffer ~dst_off:0 ~len; - let gz = Gz.Inf.src gz ic_buffer 0 len in - until_full_or_end gz (res, res_off, res_len) - | `Malformed err -> failwith ("gzip: " ^ err) in - let max = (De.bigstring_length oc_buffer - Gz.Inf.dst_rem state.gz) - state.pos in - let len = min (Bytes.length res) max in - bigstring_blit_bytes oc_buffer ~src_off:state.pos res ~dst_off:0 ~len; - if len < max - then ( state.pos <- state.pos + len - ; Async.return () ) - else until_full_or_end (Gz.Inf.flush state.gz) (res, len, Bytes.length res - len) - - let skip : in_channel -> int -> unit io = fun state len -> - let res = Bytes.create len in - really_read state res - end - - module HeaderWriter = Tar.HeaderWriter (Async) (Gz_writer) - module HeaderReader = Tar.HeaderReader (Async) (Gz_reader) - - type in_channel = Gz_reader.in_channel - - let of_in_channel ~internal:oc_buffer in_channel = - { Gz_reader.gz= Gz.Inf.decoder `Manual ~o:oc_buffer - ; oc_buffer - ; ic_buffer= De.bigstring_create 0x1000 - ; tp_buffer= Bytes.create 0x1000 - ; in_channel - ; pos= 0 } - - let really_read = Gz_reader.really_read - let skip = Gz_reader.skip - type out_channel = Gz_writer.out_channel let of_out_channel ?bits:(w_bits= 15) ?q:(q_len= 0x1000) ~level ~mtime os out_channel = @@ -230,3 +240,4 @@ module Make | `End _gz -> Async.return () in until_end (Gz.Def.src state.gz De.bigstring_empty 0 0) end +*) diff --git a/lib/tar_gz.mli b/lib/tar_gz.mli index de18b76..a6e52c5 100644 --- a/lib/tar_gz.mli +++ b/lib/tar_gz.mli @@ -14,6 +14,13 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) +type error = [ `Fatal of Tar.error | `Eof | `Gz of string ] + +type 'err run = { run : 'a 'err. ('a, 'err) Tar.t -> ('a, 'err) result } [@@unboxed] + +val fold_with_gz : run:[> error ] run -> ('a, [> error]) Tar.fold + +(* module type READER = sig type in_channel type 'a io @@ -72,3 +79,4 @@ module Make module HeaderWriter : Tar.HEADERWRITER with type out_channel = out_channel and type 'a io = 'a Async.t end +*) diff --git a/unix/tar_lwt_unix.ml b/unix/tar_lwt_unix.ml index c0ff4a2..d80227e 100644 --- a/unix/tar_lwt_unix.ml +++ b/unix/tar_lwt_unix.ml @@ -16,7 +16,7 @@ *) type decode_error = [ - | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] + | `Fatal of Tar.error | `Unix of Unix.error * string * string | `Unexpected_end_of_file | `Msg of string @@ -63,38 +63,18 @@ let safe_close fd = let fold f filename init = let open Lwt_result.Infix in safe Lwt_unix.(openfile filename [ O_RDONLY ]) 0 >>= fun fd -> - let rec go t fd ?global ?data acc = - (match data with - | None -> - let buf = Bytes.make Tar.Header.length '\000' in - read_complete fd buf Tar.Header.length >|= fun () -> - Bytes.unsafe_to_string buf - | Some data -> - Lwt.return (Ok data)) >>= fun data -> - match Tar.decode t data with - | Ok (t, Some `Header hdr, g) -> - let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - f fd ?global hdr acc >>= fun acc' -> - seek fd (Tar.Header.compute_zero_padding_length hdr) >>= fun _off -> - go t fd ?global acc' - | Ok (t, Some `Skip n, g) -> - let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - seek fd n >>= fun _off -> - go t fd ?global acc - | Ok (t, Some `Read n, g) -> - let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - let buf = Bytes.make n '\000' in - read_complete fd buf n >>= fun () -> - let data = Bytes.unsafe_to_string buf in - go t fd ?global ~data acc - | Ok (t, None, g) -> - let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - go t fd ?global acc - | Error `Eof -> Lwt.return (Ok acc) - | Error `Fatal _ as e -> Lwt.return e - in + let rec run : type a. (a, [> decode_error ] as 'err) Tar.t -> (a, 'err) result Lwt.t = function + | Tar.Read _ -> assert false (* XXX(dinosaure): [Tar.fold] does not emit [Tar.Read]. *) + | Tar.Really_read len -> + let buf = Bytes.make len '\000' in + read_complete fd buf Tar.Header.length >|= fun () -> + Bytes.unsafe_to_string buf + | Tar.Seek len -> seek fd len + | Tar.Return value -> Lwt.return value + | Tar.Bind (x, f) -> + run x >>= fun value -> run (f value) in Lwt.finalize - (fun () -> go (Tar.decode_state ()) fd init) + (fun () -> run (Tar.fold (f fd) init)) (fun () -> safe_close fd) let unix_err_to_msg = function @@ -125,7 +105,7 @@ let copy ~src_fd ~dst_fd len = let extract ?(filter = fun _ -> true) ~src dst = let open Lwt_result.Infix in - let f fd ?global:_ hdr () = + let f fd hdr = if filter hdr then match hdr.Tar.Header.link_indicator with | Tar.Header.Link.Normal -> @@ -146,7 +126,29 @@ let extract ?(filter = fun _ -> true) ~src dst = (seek fd (Int64.to_int hdr.Tar.Header.file_size)) >|= fun _off -> () in - fold f src () + (* XXX(dinosaure): the lwt logic to ignore the ['a Lwt.t]. *) + let open Lwt.Infix in + let queue = Queue.create () in + let f fd ?global:_ hdr () = + let th = f fd hdr in + Queue.add th queue; + Ok () in + let stop, do_stop = Lwt.task () in + let consume queue = match Queue.take_opt queue with + | Some th -> Lwt.return (`Thread th) + | None -> Lwt.return `Yield in + let rec join () = + Lwt.pick [ stop; consume queue ] >>= function + | `Stop -> Lwt.return_unit + | `Thread th -> th >>= fun _ -> join () + (* TODO(dinosaure): we can do the yallop's trick and add a new kind of + value into [Tar.t] to express the ability to resolve an ['a Lwt.t] + value or we can as we do currently what we do here but we ignore all + errors from the user function. *) + | `Yield -> Lwt.pause () >>= join in + let fold () = fold f src () >|= fun acc -> + Lwt.wakeup do_stop `Stop; acc in + Lwt.both (fold ()) (join ()) >|= fun (acc, ()) -> acc (** Return the header needed for a particular file on disk *) let header_of_file ?level file = diff --git a/unix/tar_lwt_unix.mli b/unix/tar_lwt_unix.mli index 1282b38..7a177fb 100644 --- a/unix/tar_lwt_unix.mli +++ b/unix/tar_lwt_unix.mli @@ -17,7 +17,7 @@ (** Lwt_unix I/O for tar-formatted data *) type decode_error = [ - | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] + | `Fatal of Tar.error | `Unix of Unix.error * string * string | `Unexpected_end_of_file | `Msg of string @@ -30,8 +30,8 @@ val pp_decode_error : Format.formatter -> decode_error -> unit descriptor by [hdr.Tar.Header.file_size]. *) val fold : (Lwt_unix.file_descr -> ?global:Tar.Header.Extended.t -> Tar.Header.t -> 'a -> - ('a, decode_error) result Lwt.t) -> - string -> 'a -> ('a, decode_error) result Lwt.t + ('a, [> decode_error ] as 'err) result) -> + string -> 'a -> ('a, 'err) result Lwt.t (** [extract ~filter ~src dst] extracts the tar archive [src] into the directory [dst]. If [dst] does not exist, it is created. If [filter] is diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index b4c04db..6cfccdb 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -42,7 +42,7 @@ let seek fd n = safe (Unix.lseek fd n) Unix.SEEK_CUR type decode_error = [ - | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] + | `Fatal of Tar.error | `Unix of Unix.error * string * string | `Unexpected_end_of_file | `Msg of string @@ -60,39 +60,22 @@ let pp_decode_error ppf = function let fold f filename init = let* fd = safe Unix.(openfile filename [ O_RDONLY ]) 0 in - let rec go t fd ?global ?data acc = - let* data = match data with - | None -> - let buf = Bytes.make Tar.Header.length '\000' in - let* () = read_complete fd buf Tar.Header.length in - Ok (Bytes.unsafe_to_string buf) - | Some data -> Ok data - in - match Tar.decode t data with - | Ok (t, Some `Header hdr, g) -> - let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - let* acc' = f fd ?global hdr acc in - let* _off = seek fd (Tar.Header.compute_zero_padding_length hdr) in - go t fd ?global acc' - | Ok (t, Some `Skip n, g) -> - let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - let* _off = seek fd n in - go t fd ?global acc - | Ok (t, Some `Read n, g) -> - let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - let buf = Bytes.make n '\000' in - let* () = read_complete fd buf n in - let data = Bytes.unsafe_to_string buf in - go t fd ?global ~data acc - | Ok (t, None, g) -> - let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - go t fd ?global acc - | Error `Eof -> Ok acc - | Error `Fatal _ as e -> e - in + let rec run : type a. (a, [> decode_error ] as 'err) Tar.t -> (a, 'err) result = function + | Tar.Read _ -> assert false (* XXX(dinosaure): [Tar.fold] does not emit [Tar.Read]. *) + | Tar.Really_read len -> + let buf = Bytes.make len '\000' in + begin match read_complete fd buf len with + | Ok () -> Ok (Bytes.unsafe_to_string buf) + | Error _ as err -> err end + | Tar.Seek len -> seek fd len + | Tar.Return value -> value + | Tar.Bind (x, f) -> + match run x with + | Ok value -> run (f value) + | Error _ as err -> err in Fun.protect ~finally:(fun () -> safe_close fd) - (fun () -> go (Tar.decode_state ()) fd init) + (fun () -> run (Tar.fold (f fd) init)) let unix_err_to_msg = function | `Unix (e, f, s) -> diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index 3863ffd..11bf94c 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -17,7 +17,7 @@ (** Unix I/O for tar-formatted data. *) type decode_error = [ - | `Fatal of [ `Checksum_mismatch | `Corrupt_pax_header | `Unmarshal of string ] + | `Fatal of Tar.error | `Unix of Unix.error * string * string | `Unexpected_end_of_file | `Msg of string From 8b308a9a2f53a8680b6cbee67f588a28ead1d021 Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Wed, 7 Feb 2024 13:30:32 +0100 Subject: [PATCH 16/34] Keep the bind as is and Tar_gz does not require the run function (/cc @reynir) --- lib/tar_gz.ml | 11 ++--------- lib/tar_gz.mli | 4 +--- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/lib/tar_gz.ml b/lib/tar_gz.ml index 9bac895..bb102fe 100644 --- a/lib/tar_gz.ml +++ b/lib/tar_gz.ml @@ -126,20 +126,13 @@ let seek_through_gz : decoder -> int -> (int, [> error ]) Tar.t = fun state len let* _buf = really_read_through_gz state len in Tar.return (Ok 0 (* XXX(dinosaure): actually, [fold] ignores the result. *)) -type 'err run = { run : 'a 'err. ('a, 'err) Tar.t -> ('a, 'err) result } [@@unboxed] - -let fold_with_gz - : run:[> error ] run -> _ -> _ -> _ - = fun ~run:{ run } f init -> +let fold_with_gz f init = let rec go : type a. decoder -> (a, [> error ] as 'err) Tar.t -> (a, 'err) Tar.t = fun decoder -> function | Tar.Really_read len -> really_read_through_gz decoder len | Tar.Read _len -> assert false (* XXX(dinosaure): actually does not emit [Tar.Read]. *) | Tar.Seek len -> seek_through_gz decoder len | Tar.Return v -> Tar.return v - | Tar.Bind (x, f) -> - match run x with - | Ok value -> go decoder (f value) - | Error _ as err -> Tar.return err in + | Tar.Bind _ as bind -> bind in let decoder = let oc_buffer = De.bigstring_create 0x1000 in { gz= Gz.Inf.decoder `Manual ~o:oc_buffer diff --git a/lib/tar_gz.mli b/lib/tar_gz.mli index a6e52c5..984e874 100644 --- a/lib/tar_gz.mli +++ b/lib/tar_gz.mli @@ -16,9 +16,7 @@ type error = [ `Fatal of Tar.error | `Eof | `Gz of string ] -type 'err run = { run : 'a 'err. ('a, 'err) Tar.t -> ('a, 'err) result } [@@unboxed] - -val fold_with_gz : run:[> error ] run -> ('a, [> error]) Tar.fold +val fold_with_gz : ('a, [> error]) Tar.fold (* module type READER = sig From 14681fe1c8c07f020854044748832f785a49c55e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Wed, 7 Feb 2024 14:21:11 +0100 Subject: [PATCH 17/34] Implement Tar_gz.gzipped : _ Tar.t -> _ Tar.t We can list .tar.gz archives that consists of directories and empty files \o/ files with content is not possible /o\ --- bin/dune | 2 +- bin/otar.ml | 66 ++++++++++++++++++++++---------------------- lib/tar_gz.ml | 10 ++++--- lib/tar_gz.mli | 2 +- unix/tar_lwt_unix.ml | 20 +++++++++++--- unix/tar_unix.ml | 27 +++++++++++++----- unix/tar_unix.mli | 2 ++ 7 files changed, 79 insertions(+), 50 deletions(-) diff --git a/bin/dune b/bin/dune index ad0a49e..eaf47cd 100644 --- a/bin/dune +++ b/bin/dune @@ -2,4 +2,4 @@ (name otar) (public_name otar) (package tar-unix) - (libraries unix tar.gz)) + (libraries unix tar.gz tar_unix)) diff --git a/bin/otar.ml b/bin/otar.ml index 75fedd5..d57ce57 100644 --- a/bin/otar.ml +++ b/bin/otar.ml @@ -13,23 +13,8 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) -(* -let () = Printexc.record_backtrace true - -module Tar_gz = Tar_gz.Make - (struct type 'a t = 'a - let ( >>= ) x f = f x - let return x = x end) - (struct type out_channel = Stdlib.out_channel - type 'a io = 'a - let really_write oc str = - output_string oc str end) - (struct type in_channel = Stdlib.in_channel - type 'a io = 'a - let read ic buf = - input ic buf 0 (Bytes.length buf) - end) +let () = Printexc.record_backtrace true let ( / ) = Filename.concat @@ -44,6 +29,7 @@ let stream_of_fd fd = let always x = fun _ -> x +(* let create_tarball directory oc = let files = Sys.readdir directory in let os = match Sys.os_type with @@ -85,6 +71,7 @@ let make directory oc = let oc = open_out filename in oc, (fun () -> close_out oc), Filename.extension filename = ".gz" in create_tarball directory oc ; oc_close () + *) let sizes = [| "B"; "KiB"; "MiB"; "GiB"; "TiB"; "PiB"; "EiB"; "ZiB"; "YiB" |] @@ -97,36 +84,49 @@ let bytes_to_size ?(decimals = 2) ppf = function Format.fprintf ppf "%.*f %s" decimals r sizes.(int_of_float i) let list filename = - let ic = open_in filename in - let ic = Tar_gz.of_in_channel ~internal:(De.bigstring_create 0x1000) ic in - let rec go global () = match Tar_gz.HeaderReader.read ~global ic with - | Ok (hdr, global) -> - Format.printf "%s (%s, %a)\n%!" - hdr.Tar.Header.file_name - (Tar.Header.Link.to_string hdr.link_indicator) - (bytes_to_size ~decimals:2) hdr.Tar.Header.file_size ; - (* Alternatively: - let padding = Tar.Header.compute_zero_padding_length hdr in - let data = Int64.to_int hdr.Tar.Header.file_size in - let to_skip = data + padding in *) - let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in - Tar_gz.skip ic to_skip ; - go global () + let go ?global:_ hdr () = + Format.printf "%s (%s, %a)\n%!" + hdr.Tar.Header.file_name + (Tar.Header.Link.to_string hdr.link_indicator) + (bytes_to_size ~decimals:2) hdr.Tar.Header.file_size ; + (* + (* Alternatively: + let padding = Tar.Header.compute_zero_padding_length hdr in + let data = Int64.to_int hdr.Tar.Header.file_size in + let to_skip = data + padding in *) + let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in + Tar_gz.skip ic to_skip ; + go global () | Error `Eof -> () | Error `Fatal e -> Format.eprintf "Error listing archive: %a\n%!" Tar.pp_error e; exit 2 + *) + Ok () in - go None () + let fd = Unix.openfile filename [ Unix.O_RDONLY ] 0 in + match Tar_unix.run (Tar_gz.gzipped (Tar.fold go ())) fd with + | Ok () -> () + | Error (`Unix _) -> + Format.eprintf "Some UNIX error occurred.\n%!" + | Error (`Msg e) -> + Format.eprintf "Some error: %s.\n%!" e + | Error `Unexpected_end_of_file -> + Format.eprintf "Unexpected end of file.\n%!" + | Error `Eof | Error `Gz _ -> + Format.eprintf "Some fatal error occurred.\n%!" + | Error (`Fatal _) -> + Format.eprintf "Some fatal error occurred.\n%!" let () = match Sys.argv with | [| _; "list"; filename; |] when Sys.file_exists filename -> list filename + (* | [| _; directory |] when Sys.is_directory directory -> make directory None | [| _; directory; output |] when Sys.is_directory directory -> make directory (Some output) + *) | _ -> let cmd = Filename.basename Sys.argv.(0) in Format.eprintf "%s []\n%s list \n" cmd cmd -*) diff --git a/lib/tar_gz.ml b/lib/tar_gz.ml index bb102fe..1e39d48 100644 --- a/lib/tar_gz.ml +++ b/lib/tar_gz.ml @@ -126,13 +126,15 @@ let seek_through_gz : decoder -> int -> (int, [> error ]) Tar.t = fun state len let* _buf = really_read_through_gz state len in Tar.return (Ok 0 (* XXX(dinosaure): actually, [fold] ignores the result. *)) -let fold_with_gz f init = +let gzipped t = let rec go : type a. decoder -> (a, [> error ] as 'err) Tar.t -> (a, 'err) Tar.t = fun decoder -> function - | Tar.Really_read len -> really_read_through_gz decoder len + | Tar.Really_read len -> + really_read_through_gz decoder len | Tar.Read _len -> assert false (* XXX(dinosaure): actually does not emit [Tar.Read]. *) | Tar.Seek len -> seek_through_gz decoder len | Tar.Return v -> Tar.return v - | Tar.Bind _ as bind -> bind in + | Tar.Bind (x, f) -> + Tar.Bind (go decoder x, (fun x -> go decoder (f x))) in let decoder = let oc_buffer = De.bigstring_create 0x1000 in { gz= Gz.Inf.decoder `Manual ~o:oc_buffer @@ -140,7 +142,7 @@ let fold_with_gz f init = ; ic_buffer= De.bigstring_create 0x1000 ; tp_length= 0x1000 ; pos= 0 } in - go decoder (Tar.fold f init) + go decoder t (* module Make diff --git a/lib/tar_gz.mli b/lib/tar_gz.mli index 984e874..878dd42 100644 --- a/lib/tar_gz.mli +++ b/lib/tar_gz.mli @@ -16,7 +16,7 @@ type error = [ `Fatal of Tar.error | `Eof | `Gz of string ] -val fold_with_gz : ('a, [> error]) Tar.fold +val gzipped : ('a, ([> error ] as 'err)) Tar.t -> ('a, 'err) Tar.t (* module type READER = sig diff --git a/unix/tar_lwt_unix.ml b/unix/tar_lwt_unix.ml index d80227e..37ec8cb 100644 --- a/unix/tar_lwt_unix.ml +++ b/unix/tar_lwt_unix.ml @@ -60,11 +60,18 @@ let seek fd n = let safe_close fd = Lwt.catch (fun () -> Lwt_unix.close fd) (fun _ -> Lwt.return_unit) -let fold f filename init = +let run t fd = let open Lwt_result.Infix in - safe Lwt_unix.(openfile filename [ O_RDONLY ]) 0 >>= fun fd -> let rec run : type a. (a, [> decode_error ] as 'err) Tar.t -> (a, 'err) result Lwt.t = function - | Tar.Read _ -> assert false (* XXX(dinosaure): [Tar.fold] does not emit [Tar.Read]. *) + | Tar.Read len -> + let b = Bytes.make len '\000' in + safe (Lwt_unix.read fd b 0) len >>= fun read -> + if read = 0 then + Lwt_result.fail `Unexpected_end_of_file + else if len = read then + Lwt_result.return (Bytes.unsafe_to_string b) + else + Lwt_result.return (Bytes.sub_string b 0 read) | Tar.Really_read len -> let buf = Bytes.make len '\000' in read_complete fd buf Tar.Header.length >|= fun () -> @@ -73,8 +80,13 @@ let fold f filename init = | Tar.Return value -> Lwt.return value | Tar.Bind (x, f) -> run x >>= fun value -> run (f value) in + run t + +let fold f filename init = + let open Lwt_result.Infix in + safe Lwt_unix.(openfile filename [ O_RDONLY ]) 0 >>= fun fd -> Lwt.finalize - (fun () -> run (Tar.fold (f fd) init)) + (fun () -> run (Tar.fold (f fd) init) fd) (fun () -> safe_close fd) let unix_err_to_msg = function diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index 6cfccdb..1e80579 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -58,24 +58,37 @@ let pp_decode_error ppf = function | `Msg msg -> Format.fprintf ppf "Error %s" msg -let fold f filename init = - let* fd = safe Unix.(openfile filename [ O_RDONLY ]) 0 in - let rec run : type a. (a, [> decode_error ] as 'err) Tar.t -> (a, 'err) result = function - | Tar.Read _ -> assert false (* XXX(dinosaure): [Tar.fold] does not emit [Tar.Read]. *) +let run t fd = + let rec run : type a. (a, _ as 'err) Tar.t -> (a, 'err) result = function + | Tar.Read len -> + let b = Bytes.make len '\000' in + let* read = safe (Unix.read fd b 0) len in + if read = 0 then + Error `Unexpected_end_of_file + else if len = read then + Ok (Bytes.unsafe_to_string b) + else + Ok (Bytes.sub_string b 0 read) | Tar.Really_read len -> let buf = Bytes.make len '\000' in begin match read_complete fd buf len with | Ok () -> Ok (Bytes.unsafe_to_string buf) | Error _ as err -> err end - | Tar.Seek len -> seek fd len - | Tar.Return value -> value + | Tar.Seek len -> + seek fd len + | Tar.Return value -> + value | Tar.Bind (x, f) -> match run x with | Ok value -> run (f value) | Error _ as err -> err in + run t + +let fold f filename init = + let* fd = safe Unix.(openfile filename [ O_RDONLY ]) 0 in Fun.protect ~finally:(fun () -> safe_close fd) - (fun () -> run (Tar.fold (f fd) init)) + (fun () -> run (Tar.fold (f fd) init) fd) let unix_err_to_msg = function | `Unix (e, f, s) -> diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index 11bf94c..d559be7 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -25,6 +25,8 @@ type decode_error = [ val pp_decode_error : Format.formatter -> decode_error -> unit +val run : ('a, [> decode_error ] as 'b) Tar.t -> Unix.file_descr -> ('a, 'b) result + (** [fold f filename acc] folds over the tar archive. The function [f] is called for each [hdr : Tar.Header.t]. It should forward the position in the file descriptor by [hdr.Tar.Header.file_size]. *) From d5ad1dfaf8bf3df9f505209705aa5f22bf9d7876 Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Wed, 7 Feb 2024 17:16:02 +0100 Subject: [PATCH 18/34] Fix the otar binary --- bin/otar.ml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/otar.ml b/bin/otar.ml index d57ce57..d9a0395 100644 --- a/bin/otar.ml +++ b/bin/otar.ml @@ -94,7 +94,6 @@ let list filename = let padding = Tar.Header.compute_zero_padding_length hdr in let data = Int64.to_int hdr.Tar.Header.file_size in let to_skip = data + padding in *) - let to_skip = Tar.Header.(Int64.to_int (to_sectors hdr) * length) in Tar_gz.skip ic to_skip ; go global () | Error `Eof -> () @@ -102,7 +101,10 @@ let list filename = Format.eprintf "Error listing archive: %a\n%!" Tar.pp_error e; exit 2 *) - Ok () + let open Tar in + let to_skip = Header.(Int64.to_int (to_sectors hdr) * length) in + let* _ = seek to_skip in + return (Ok ()) in let fd = Unix.openfile filename [ Unix.O_RDONLY ] 0 in match Tar_unix.run (Tar_gz.gzipped (Tar.fold go ())) fd with From c7c81d2e5e17328dd1979d779964d576c67dd61c Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Wed, 7 Feb 2024 17:16:38 +0100 Subject: [PATCH 19/34] Implement the high kind polymorphism to fix the lwt_unix layer --- lib/tar.ml | 19 +++--- lib/tar.mli | 35 ++++++----- lib/tar_gz.ml | 11 ++-- lib/tar_gz.mli | 2 +- unix/tar_lwt_unix.ml | 141 ++++++++++++++++++++++++------------------ unix/tar_lwt_unix.mli | 10 ++- unix/tar_unix.ml | 115 +++++++++++++++++++++------------- unix/tar_unix.mli | 11 ++-- 8 files changed, 203 insertions(+), 141 deletions(-) diff --git a/lib/tar.ml b/lib/tar.ml index 2666112..078deeb 100644 --- a/lib/tar.ml +++ b/lib/tar.ml @@ -816,12 +816,15 @@ let encode_header ?level header = let encode_global_extended_header ?level global = encode_extended_header ?level `Global global -type ('a, 'err) t = - | Really_read : int -> (string, 'err) t - | Read : int -> (string, 'err) t - | Seek : int -> (int, 'err) t - | Bind : ('a, 'err) t * ('a -> ('b, 'err) t) -> ('b, 'err) t - | Return : ('a, 'err) result -> ('a, 'err) t +type ('a, 't) io + +type ('a, 'err, 't) t = + | Really_read : int -> (string, 'err, 't) t + | Read : int -> (string, 'err, 't) t + | Seek : int -> (int, 'err, 't) t + | Bind : ('a, 'err, 't) t * ('a -> ('b, 'err, 't) t) -> ('b, 'err, 't) t + | Return : ('a, 'err) result -> ('a, 'err, 't) t + | High : (('a, 'err) result, 't) io -> ('a, 'err, 't) t let ( let* ) x f = Bind (x, f) let return x = Return x @@ -829,7 +832,7 @@ let really_read n = Really_read n let read n = Read n let seek n = Seek n -type ('a, 'err) fold = (?global:Header.Extended.t -> Header.t -> 'a -> ('a, 'err) result) -> 'a -> ('a, 'err) t +type ('a, 'err, 't) fold = (?global:Header.Extended.t -> Header.t -> 'a -> ('a, 'err, 't) t) -> 'a -> ('a, 'err, 't) t let fold f init = let rec go t ?global ?data acc = @@ -839,7 +842,7 @@ let fold f init = match decode t data with | Ok (t, Some `Header hdr, g) -> let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - let* acc' = return (f ?global hdr acc) in + let* acc' = f ?global hdr acc in let* _off = seek (Header.compute_zero_padding_length hdr) in go t ?global acc' | Ok (t, Some `Skip n, g) -> diff --git a/lib/tar.mli b/lib/tar.mli index e6b5769..4e5ae78 100644 --- a/lib/tar.mli +++ b/lib/tar.mli @@ -173,19 +173,22 @@ val encode_global_extended_header : ?level:Header.compatibility -> Header.Extend (** {1 Pure implementation of [fold].} *) -type ('a, 'err) t = - | Really_read : int -> (string, 'err) t - | Read : int -> (string, 'err) t - | Seek : int -> (int, 'err) t - | Bind : ('a, 'err) t * ('a -> ('b, 'err) t) -> ('b, 'err) t - | Return : ('a, 'err) result -> ('a, 'err) t - -val really_read : int -> (string, _) t -val read : int -> (string, _) t -val seek : int -> (int, _) t -val ( let* ) : ('a, 'err) t -> ('a -> ('b, 'err) t) -> ('b, 'err) t -val return : ('a, 'err) result -> ('a, 'err) t - -type ('a, 'err) fold = (?global:Header.Extended.t -> Header.t -> 'a -> ('a, 'err) result) -> 'a -> ('a, 'err) t - -val fold : ('a, [> `Fatal of error ]) fold +type ('a, 't) io + +type ('a, 'err, 't) t = + | Really_read : int -> (string, 'err, 't) t + | Read : int -> (string, 'err, 't) t + | Seek : int -> (int, 'err, 't) t + | Bind : ('a, 'err, 't) t * ('a -> ('b, 'err, 't) t) -> ('b, 'err, 't) t + | Return : ('a, 'err) result -> ('a, 'err, 't) t + | High : (('a, 'err) result, 't) io -> ('a, 'err, 't) t + +val really_read : int -> (string, _, _) t +val read : int -> (string, _, _) t +val seek : int -> (int, _, _) t +val ( let* ) : ('a, 'err, 't) t -> ('a -> ('b, 'err, 't) t) -> ('b, 'err, 't) t +val return : ('a, 'err) result -> ('a, 'err, _) t + +type ('a, 'err, 't) fold = (?global:Header.Extended.t -> Header.t -> 'a -> ('a, 'err, 't) t) -> 'a -> ('a, 'err, 't) t + +val fold : ('a, [> `Fatal of error ], 't) fold diff --git a/lib/tar_gz.ml b/lib/tar_gz.ml index 1e39d48..fb13ad9 100644 --- a/lib/tar_gz.ml +++ b/lib/tar_gz.ml @@ -75,7 +75,7 @@ type decoder = ; mutable pos : int } let really_read_through_gz - : decoder -> bytes -> (unit, 'err) Tar.t + : decoder -> bytes -> (unit, 'err, _) Tar.t = fun ({ ic_buffer; oc_buffer; tp_length; _ } as state) res -> let open Tar in let rec until_full_or_end gz (res, res_off, res_len) = @@ -121,20 +121,21 @@ let really_read_through_gz decoder len = type error = [ `Fatal of Tar.error | `Eof | `Gz of string ] -let seek_through_gz : decoder -> int -> (int, [> error ]) Tar.t = fun state len -> +let seek_through_gz : decoder -> int -> (int, [> error ], _) Tar.t = fun state len -> let open Tar in let* _buf = really_read_through_gz state len in Tar.return (Ok 0 (* XXX(dinosaure): actually, [fold] ignores the result. *)) let gzipped t = - let rec go : type a. decoder -> (a, [> error ] as 'err) Tar.t -> (a, 'err) Tar.t = fun decoder -> function + let rec go : type a. decoder -> (a, [> error ] as 'err, 't) Tar.t -> (a, 'err, 't) Tar.t = fun decoder -> function | Tar.Really_read len -> really_read_through_gz decoder len | Tar.Read _len -> assert false (* XXX(dinosaure): actually does not emit [Tar.Read]. *) | Tar.Seek len -> seek_through_gz decoder len - | Tar.Return v -> Tar.return v + | Tar.Return _ as ret -> ret | Tar.Bind (x, f) -> - Tar.Bind (go decoder x, (fun x -> go decoder (f x))) in + Tar.Bind (go decoder x, (fun x -> go decoder (f x))) + | Tar.High _ as high -> high in let decoder = let oc_buffer = De.bigstring_create 0x1000 in { gz= Gz.Inf.decoder `Manual ~o:oc_buffer diff --git a/lib/tar_gz.mli b/lib/tar_gz.mli index 878dd42..846b2c3 100644 --- a/lib/tar_gz.mli +++ b/lib/tar_gz.mli @@ -16,7 +16,7 @@ type error = [ `Fatal of Tar.error | `Eof | `Gz of string ] -val gzipped : ('a, ([> error ] as 'err)) Tar.t -> ('a, 'err) Tar.t +val gzipped : ('a, ([> error ] as 'err), 't) Tar.t -> ('a, 'err, 't) Tar.t (* module type READER = sig diff --git a/unix/tar_lwt_unix.ml b/unix/tar_lwt_unix.ml index 37ec8cb..4fdd244 100644 --- a/unix/tar_lwt_unix.ml +++ b/unix/tar_lwt_unix.ml @@ -60,9 +60,27 @@ let seek fd n = let safe_close fd = Lwt.catch (fun () -> Lwt_unix.close fd) (fun _ -> Lwt.return_unit) +module High : sig + type t + type 'a s = 'a Lwt.t + + external inj : 'a s -> ('a, t) Tar.io = "%identity" + external prj : ('a, t) Tar.io -> 'a s = "%identity" +end = struct + type t + type 'a s = 'a Lwt.t + + external inj : 'a -> 'b = "%identity" + external prj : 'a -> 'b = "%identity" +end + +type t = High.t + +let value v = Tar.High (High.inj v) + let run t fd = let open Lwt_result.Infix in - let rec run : type a. (a, [> decode_error ] as 'err) Tar.t -> (a, 'err) result Lwt.t = function + let rec run : type a. (a, [> decode_error ] as 'err, t) Tar.t -> (a, 'err) result Lwt.t = function | Tar.Read len -> let b = Bytes.make len '\000' in safe (Lwt_unix.read fd b 0) len >>= fun read -> @@ -78,6 +96,7 @@ let run t fd = Bytes.unsafe_to_string buf | Tar.Seek len -> seek fd len | Tar.Return value -> Lwt.return value + | Tar.High value -> High.prj value | Tar.Bind (x, f) -> run x >>= fun value -> run (f value) in run t @@ -86,7 +105,7 @@ let fold f filename init = let open Lwt_result.Infix in safe Lwt_unix.(openfile filename [ O_RDONLY ]) 0 >>= fun fd -> Lwt.finalize - (fun () -> run (Tar.fold (f fd) init) fd) + (fun () -> run (Tar.fold f init) fd) (fun () -> safe_close fd) let unix_err_to_msg = function @@ -94,73 +113,52 @@ let unix_err_to_msg = function `Msg (Format.sprintf "error %s in function %s %s" (Unix.error_message e) f s) -let copy ~src_fd ~dst_fd len = +let copy ~dst_fd len = let open Lwt_result.Infix in let blen = 65536 in - let buffer = Bytes.make blen '\000' in - let rec read_write ~src_fd ~dst_fd len = - if len = 0 then - Lwt.return (Ok ()) + let rec read_write ~dst_fd len = + if len = 0 then value (Lwt.return (Ok ())) else - let l = min blen len in - Lwt_result.map_error - (function - | `Unix _ as e -> unix_err_to_msg e - | `Unexpected_end_of_file -> - `Msg "Unexpected end of file") - (read_complete src_fd buffer l) >>= fun () -> - Lwt_result.map_error unix_err_to_msg - (safe (Lwt_unix.write dst_fd buffer 0) l) >>= fun _written -> - read_write ~src_fd ~dst_fd (len - l) + let open Tar in + let slen = min blen len in + let* str = Tar.really_read slen in + let* _written = Lwt_result.map_error unix_err_to_msg + (safe (Lwt_unix.write_string dst_fd str 0) slen) |> value in + read_write ~dst_fd (len - slen) in - read_write ~src_fd ~dst_fd len + read_write ~dst_fd len let extract ?(filter = fun _ -> true) ~src dst = + let safe_close fd = + let open Lwt.Infix in + Lwt.catch + (fun () -> Lwt_unix.close fd) + (fun _ -> Lwt.return_unit) + >|= Result.ok in let open Lwt_result.Infix in - let f fd hdr = - if filter hdr then - match hdr.Tar.Header.link_indicator with - | Tar.Header.Link.Normal -> - Lwt_result.map_error unix_err_to_msg - (safe Lwt_unix.(openfile (Filename.concat dst hdr.Tar.Header.file_name) - [ O_WRONLY ; O_CREAT ]) hdr.Tar.Header.file_mode) >>= fun dst -> - Lwt.finalize - (fun () -> copy ~src_fd:fd ~dst_fd:dst (Int64.to_int hdr.Tar.Header.file_size)) - (fun () -> safe_close dst) - (* TODO set owner / mode / mtime etc. *) - | _ -> - (* TODO handle directories, links, etc. *) - Lwt_result.map_error unix_err_to_msg - (seek fd (Int64.to_int hdr.Tar.Header.file_size)) >|= fun _off -> - () - else - Lwt_result.map_error unix_err_to_msg - (seek fd (Int64.to_int hdr.Tar.Header.file_size)) >|= fun _off -> - () + let f ?global:_ hdr () = + let open Tar in + match filter hdr, hdr.Tar.Header.link_indicator with + | true, Tar.Header.Link.Normal -> + let open Tar in + let* dst = Lwt_result.map_error + unix_err_to_msg + (safe Lwt_unix.(openfile (Filename.concat dst hdr.Tar.Header.file_name) [ O_WRONLY; O_CREAT ]) hdr.Tar.Header.file_mode) + |> value in + begin try + let* () = copy ~dst_fd:dst (Int64.to_int hdr.Tar.Header.file_size) in + let* () = value (safe_close dst) in + return (Ok ()) + with exn -> + let* () = value (safe_close dst) in + return (Error (`Exn exn)) + end + | _ -> + let open Tar in + let* _off = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in + return (Ok ()) in - (* XXX(dinosaure): the lwt logic to ignore the ['a Lwt.t]. *) - let open Lwt.Infix in - let queue = Queue.create () in - let f fd ?global:_ hdr () = - let th = f fd hdr in - Queue.add th queue; - Ok () in - let stop, do_stop = Lwt.task () in - let consume queue = match Queue.take_opt queue with - | Some th -> Lwt.return (`Thread th) - | None -> Lwt.return `Yield in - let rec join () = - Lwt.pick [ stop; consume queue ] >>= function - | `Stop -> Lwt.return_unit - | `Thread th -> th >>= fun _ -> join () - (* TODO(dinosaure): we can do the yallop's trick and add a new kind of - value into [Tar.t] to express the ability to resolve an ['a Lwt.t] - value or we can as we do currently what we do here but we ignore all - errors from the user function. *) - | `Yield -> Lwt.pause () >>= join in - let fold () = fold f src () >|= fun acc -> - Lwt.wakeup do_stop `Stop; acc in - Lwt.both (fold ()) (join ()) >|= fun (acc, ()) -> acc + fold f src () (** Return the header needed for a particular file on disk *) let header_of_file ?level file = @@ -215,6 +213,27 @@ let write_header ?level header fd = Lwt_result.lift (Tar.encode_header ?level header) >>= fun header_strings -> write_strings fd header_strings +let copy ~src_fd ~dst_fd len = + let open Lwt_result.Infix in + let blen = 65536 in + let buffer = Bytes.make blen '\000' in + let rec read_write ~src_fd ~dst_fd len = + if len = 0 then + Lwt.return (Ok ()) + else + let l = min blen len in + Lwt_result.map_error + (function + | `Unix _ as e -> unix_err_to_msg e + | `Unexpected_end_of_file -> + `Msg "Unexpected end of file") + (read_complete src_fd buffer l) >>= fun () -> + Lwt_result.map_error unix_err_to_msg + (safe (Lwt_unix.write dst_fd buffer 0) l) >>= fun _written -> + read_write ~src_fd ~dst_fd (len - l) + in + read_write ~src_fd ~dst_fd len + let append_file ?level ?header filename fd = let open Lwt_result.Infix in (match header with diff --git a/unix/tar_lwt_unix.mli b/unix/tar_lwt_unix.mli index 7a177fb..106a9dc 100644 --- a/unix/tar_lwt_unix.mli +++ b/unix/tar_lwt_unix.mli @@ -25,12 +25,16 @@ type decode_error = [ val pp_decode_error : Format.formatter -> decode_error -> unit +type t + +val value : ('a, 'err) result Lwt.t -> ('a, 'err, t) Tar.t + (** [fold f filename acc] folds over the tar archive. The function [f] is called for each [hdr : Tar.Header.t]. It should forward the position in the file descriptor by [hdr.Tar.Header.file_size]. *) val fold : - (Lwt_unix.file_descr -> ?global:Tar.Header.Extended.t -> Tar.Header.t -> 'a -> - ('a, [> decode_error ] as 'err) result) -> + (?global:Tar.Header.Extended.t -> Tar.Header.t -> 'a -> + ('a, [> decode_error ] as 'err, t) Tar.t) -> string -> 'a -> ('a, 'err) result Lwt.t (** [extract ~filter ~src dst] extracts the tar archive [src] into the @@ -40,7 +44,7 @@ val fold : val extract : ?filter:(Tar.Header.t -> bool) -> src:string -> string -> - (unit, decode_error) result Lwt.t + (unit, [> `Exn of exn | decode_error ]) result Lwt.t (** [create ~level ~filter ~src dst] creates a tar archive at [dst]. It uses [src], a directory name, as input. If [filter] is provided diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index 1e80579..55f9999 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -58,8 +58,26 @@ let pp_decode_error ppf = function | `Msg msg -> Format.fprintf ppf "Error %s" msg +module High : sig + type t + type 'a s = 'a + + external inj : 'a s -> ('a, t) Tar.io = "%identity" + external prj : ('a, t) Tar.io -> 'a s = "%identity" +end = struct + type t + type 'a s = 'a + + external inj : 'a -> 'b = "%identity" + external prj : 'a -> 'b = "%identity" +end + +type t = High.t + +let value v = Tar.High (High.inj v) + let run t fd = - let rec run : type a. (a, _ as 'err) Tar.t -> (a, 'err) result = function + let rec run : type a. (a, _ as 'err, t) Tar.t -> (a, 'err) result = function | Tar.Read len -> let b = Bytes.make len '\000' in let* read = safe (Unix.read fd b 0) len in @@ -74,10 +92,9 @@ let run t fd = begin match read_complete fd buf len with | Ok () -> Ok (Bytes.unsafe_to_string buf) | Error _ as err -> err end - | Tar.Seek len -> - seek fd len - | Tar.Return value -> - value + | Tar.Seek len -> seek fd len + | Tar.Return value -> value + | Tar.High value -> High.prj value | Tar.Bind (x, f) -> match run x with | Ok value -> run (f value) @@ -88,63 +105,52 @@ let fold f filename init = let* fd = safe Unix.(openfile filename [ O_RDONLY ]) 0 in Fun.protect ~finally:(fun () -> safe_close fd) - (fun () -> run (Tar.fold (f fd) init) fd) + (fun () -> run (Tar.fold f init) fd) let unix_err_to_msg = function | `Unix (e, f, s) -> `Msg (Format.sprintf "error %s in function %s %s" (Unix.error_message e) f s) -let copy ~src_fd ~dst_fd len = +let copy ~dst_fd len = let blen = 65536 in - let buffer = Bytes.make blen '\000' in - let rec read_write ~src_fd ~dst_fd len = - if len = 0 then - Ok () + let rec read_write ~dst_fd len = + let open Tar in + if len = 0 then Tar.return (Ok ()) else - let l = min blen len in - let* () = - Result.map_error - (function - | `Unix _ as e -> unix_err_to_msg e - | `Unexpected_end_of_file -> - `Msg "Unexpected end of file") - (read_complete src_fd buffer l) - in - let* _written = - Result.map_error unix_err_to_msg - (safe (Unix.write dst_fd buffer 0) l) - in - read_write ~src_fd ~dst_fd (len - l) + let slen = min blen len in + let* str = really_read (min blen len) in + safe (Unix.write_substring dst_fd str 0) slen + |> Result.map_error unix_err_to_msg + |> function + | Ok _ -> read_write ~dst_fd (len - slen) + | Error _ as err -> return err in - read_write ~src_fd ~dst_fd len + read_write ~dst_fd len let extract ?(filter = fun _ -> true) ~src dst = - let f fd ?global:_ hdr () = + let f ?global:_ hdr () = if filter hdr then match hdr.Tar.Header.link_indicator with | Tar.Header.Link.Normal -> - let* dst = - Result.map_error unix_err_to_msg + begin match Result.map_error unix_err_to_msg (safe Unix.(openfile (Filename.concat dst hdr.Tar.Header.file_name) - [ O_WRONLY ; O_CREAT ]) hdr.Tar.Header.file_mode) - in - Fun.protect ~finally:(fun () -> safe_close dst) - (fun () -> copy ~src_fd:fd ~dst_fd:dst (Int64.to_int hdr.Tar.Header.file_size)) + [ O_WRONLY ; O_CREAT ]) hdr.Tar.Header.file_mode) with + | Error _ as err -> Tar.return err + | Ok dst -> + try copy ~dst_fd:dst (Int64.to_int hdr.Tar.Header.file_size) + with exn -> safe_close dst; Tar.return (Error (`Exn exn)) + end (* TODO set owner / mode / mtime etc. *) | _ -> (* TODO handle directories, links, etc. *) - let* _off = - Result.map_error unix_err_to_msg - (seek fd (Int64.to_int hdr.Tar.Header.file_size)) - in - Ok () + let open Tar in + let* _off = seek (Int64.to_int hdr.Tar.Header.file_size) in + return (Ok ()) else - let* _off = - Result.map_error unix_err_to_msg - (seek fd (Int64.to_int hdr.Tar.Header.file_size)) - in - Ok () + let open Tar in + let* _off = seek (Int64.to_int hdr.Tar.Header.file_size) in + Tar.return (Ok ()) in fold f src () @@ -196,6 +202,29 @@ let write_header ?level header fd = let* header_strings = Tar.encode_header ?level header in write_strings fd header_strings +let copy ~src_fd ~dst_fd len = + let blen = 65536 in + let buffer = Bytes.make blen '\000' in + let rec read_write ~src_fd ~dst_fd len = + if len = 0 then Ok () + else + let l = min blen len in + let* () = + Result.map_error + (function + | `Unix _ as e -> unix_err_to_msg e + | `Unexpected_end_of_file -> + `Msg "Unexpected end of file") + (read_complete src_fd buffer l) + in + let* _written = + Result.map_error unix_err_to_msg + (safe (Unix.write dst_fd buffer 0) l) + in + read_write ~src_fd ~dst_fd (len - l) + in + read_write ~src_fd ~dst_fd len + let append_file ?level ?header filename fd = let* header = match header with | None -> header_of_file ?level filename diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index d559be7..509ca9b 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -23,16 +23,19 @@ type decode_error = [ | `Msg of string ] +type t + val pp_decode_error : Format.formatter -> decode_error -> unit -val run : ('a, [> decode_error ] as 'b) Tar.t -> Unix.file_descr -> ('a, 'b) result +val run : ('a, [> decode_error ] as 'b, t) Tar.t -> Unix.file_descr -> ('a, 'b) result +val value : ('a, 'err) result -> ('a, 'err, t) Tar.t (** [fold f filename acc] folds over the tar archive. The function [f] is called for each [hdr : Tar.Header.t]. It should forward the position in the file descriptor by [hdr.Tar.Header.file_size]. *) val fold : - (Unix.file_descr -> ?global:Tar.Header.Extended.t -> Tar.Header.t -> 'a -> - ('a, decode_error) result) -> + (?global:Tar.Header.Extended.t -> Tar.Header.t -> 'a -> + ('a, decode_error, t) Tar.t) -> string -> 'a -> ('a, decode_error) result (** [extract ~filter ~src dst] extracts the tar archive [src] into the @@ -42,7 +45,7 @@ val fold : val extract : ?filter:(Tar.Header.t -> bool) -> src:string -> string -> - (unit, decode_error) result + (unit, [> `Exn of exn | decode_error ]) result (** [create ~level ~filter ~src dst] creates a tar archive at [dst]. It uses [src], a directory name, as input. If [filter] is provided From 906d6dc44b0d11e2cdb104ad252aa7e80c93a5f7 Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Wed, 21 Feb 2024 10:37:39 +0100 Subject: [PATCH 20/34] Add a comment to explain the hkp trick --- lib/tar.mli | 12 +++++++++++- unix/tar_unix.ml | 8 ++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/tar.mli b/lib/tar.mli index 4e5ae78..4a1676b 100644 --- a/lib/tar.mli +++ b/lib/tar.mli @@ -171,7 +171,17 @@ val encode_header : ?level:Header.compatibility -> a list of strings. *) val encode_global_extended_header : ?level:Header.compatibility -> Header.Extended.t -> (string list, [> `Msg of string ]) result -(** {1 Pure implementation of [fold].} *) +(** {1 Pure implementation of [fold].} + + [fold] produces a [('a, 'err, 't) t] value which can be {b evaluated} by + a scheduler (such as [lwt] or [unix]). This value describe when we require + to [Read] (like {!val:Stdlib.input}), [Really_read] (like + {!val:Stdlib.really_read}) and [Seek] (like {!val:Stdlib.seek_in}). + + We can compose these actions with [Bind], [Return] and [High]. The latter + allows you to use a value [('a, 't) io] that comes from the scheduler used - + so you can use an Lwt value (['a Lwt.t]) without depending on Lwt + ([('a, lwt) t]) at this stage. *) type ('a, 't) io diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index 55f9999..36ac1ce 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -58,6 +58,14 @@ let pp_decode_error ppf = function | `Msg msg -> Format.fprintf ppf "Error %s" msg +(* XXX(dinosaure): This is a trick to pass from a value ['a] to a value + [('a, High.t) Tar.io]. It may seem that the code is "unsafe" but physically + the value remains the same (we mainly want to decorate the type of our value + with new information). For more information on this trick, it is well + described in this research paper: + + https://www.cl.cam.ac.uk/~jdy22/papers/lightweight-higher-kinded-polymorphism.pdf +*) module High : sig type t type 'a s = 'a From b8b4ff6ac46775bf43fc97e1a876025991a7a248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Tue, 7 May 2024 11:37:36 +0200 Subject: [PATCH 21/34] Minor: qualify opens, fix tests --- lib_test/global_extended_headers_test.ml | 12 ++++---- lib_test/parse_test.ml | 35 ++++++++++++++---------- unix/tar_lwt_unix.ml | 14 ++++------ unix/tar_unix.ml | 16 +++++------ 4 files changed, 41 insertions(+), 36 deletions(-) diff --git a/lib_test/global_extended_headers_test.ml b/lib_test/global_extended_headers_test.ml index c130382..ff93383 100644 --- a/lib_test/global_extended_headers_test.ml +++ b/lib_test/global_extended_headers_test.ml @@ -46,24 +46,26 @@ let use_global_extended_headers _test_ctxt = let pp ppf hdr = Fmt.pf ppf "%s" (Tar.Header.Extended.to_detailed_string hdr) in Alcotest.testable (fun ppf hdr -> Fmt.pf ppf "%a" Fmt.(option pp) hdr) ( = ) in - let f _fd ?global hdr idx = + let f ?global hdr idx = + let ( let* ) = Tar.( let* ) in + let* _pos = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in match idx with | 0 -> Alcotest.check header "expected global header" (Some g0) global; Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id; - Ok 1 + Tar.return (Ok 1) | 1 -> Alcotest.check header "expected global header" (Some g0) global; Alcotest.(check int) "expected user" 2000 hdr.Tar.Header.user_id; - Ok 2 + Tar.return (Ok 2) | 2 -> Alcotest.check header "expected global header" (Some g0) global; Alcotest.(check int) "expected user" 1000 hdr.Tar.Header.user_id; - Ok 3 + Tar.return (Ok 3) | 3 -> Alcotest.check header "expected global header" (Some g1) global; Alcotest.(check int) "expected user" 3000 hdr.Tar.Header.user_id; - Ok 4 + Tar.return (Ok 4) | _ -> Alcotest.fail "too many headers" in match Tar_unix.fold f "test.tar" 0 with diff --git a/lib_test/parse_test.ml b/lib_test/parse_test.ml index 03d0bd9..d0a6469 100644 --- a/lib_test/parse_test.ml +++ b/lib_test/parse_test.ml @@ -32,10 +32,11 @@ module Unix = struct end let list filename = - let f fd ?global:_ hdr acc = + let f ?global:_ hdr acc = print_endline hdr.Tar.Header.file_name; - ignore Unix.(lseek fd (Int64.to_int hdr.Tar.Header.file_size) SEEK_CUR); - Ok (hdr :: acc) + let ( let* ) = Tar.( let* ) in + let* _pos = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in + Tar.return (Ok (hdr :: acc)) in match Tar_unix.fold f filename [] with | Ok acc -> List.rev acc @@ -44,7 +45,7 @@ let list filename = let pp_header f x = Fmt.pf f "%s" (Tar.Header.to_detailed_string x) let header = Alcotest.testable pp_header ( = ) -let error = Alcotest.testable Tar.pp_error ( = ) +let error : Tar.error Alcotest.testable = Alcotest.testable Tar.pp_error ( = ) let link = Alcotest.testable (Fmt.of_to_string Tar.Header.Link.to_string) ( = ) @@ -165,10 +166,12 @@ let can_list_long_pax_tar () = - Reynir *) let can_list_pax_implicit_dir () = - let f _fd ?global:_ hdr () = + let f ?global:_ hdr () = Alcotest.(check link) "is directory" Tar.Header.Link.Directory hdr.Tar.Header.link_indicator; Alcotest.(check string) "filename is patched" "clearly/a/directory/" hdr.file_name; - Ok () + let ( let* ) = Tar.( let* ) in + let* _pos = Tar.seek (Int64.to_int hdr.file_size) in + Tar.return (Ok ()) in match Tar_unix.fold f "lib_test/pax-shenanigans.tar" () with | Ok () -> () @@ -186,10 +189,12 @@ let can_list_pax_implicit_dir () = Tar.Header.marshal ~level (Cstruct.shift buf 1024) hdr; buf] *) let can_list_longlink_implicit_dir () = - let f _fd ?global:_ hdr () = + let f ?global:_ hdr () = Alcotest.(check link) "is directory" Tar.Header.Link.Directory hdr.Tar.Header.link_indicator; Alcotest.(check string) "filename is patched" "some/long/name/for/a/directory/" hdr.file_name; - Ok () + let ( let* ) = Tar.( let* ) in + let* _pos = Tar.seek (Int64.to_int hdr.file_size) in + Tar.return (Ok ()) in match Tar_unix.fold f "lib_test/long-implicit-dir.tar" () with | Ok () -> () @@ -210,8 +215,9 @@ let can_transform_tar () = let tar_out = Filename.temp_file "tar-transformed" ".tar" in let fd_out = Unix.openfile tar_out [ O_WRONLY; O_CREAT; O_CLOEXEC ] 0o644 in with_tmpdir @@ fun temp_dir -> - let f fd ?global:_ hdr _ = - ignore Unix.(lseek fd (Int64.to_int hdr.Tar.Header.file_size) SEEK_CUR); + let f ?global:_ hdr _ = + let ( let* ) = Tar.( let* ) in + let* _pos = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in let hdr = { hdr with Tar.Header.file_name = Filename.concat temp_dir hdr.file_name; @@ -219,7 +225,7 @@ let can_transform_tar () = } in match Tar_unix.write_header ~level hdr fd_out with - | Ok () -> Ok () + | Ok () -> Tar.return (Ok ()) | Error _ -> Alcotest.fail "error writing header" in match Tar_unix.fold f tar_in () with @@ -229,11 +235,12 @@ let can_transform_tar () = | Error _ -> Alcotest.fail "couldn't write end" | Ok () -> Unix.close fd_out; - let f fd ?global:_ hdr _ = - ignore Unix.(lseek fd (Int64.to_int hdr.Tar.Header.file_size) SEEK_CUR); + let f ?global:_ hdr _ = + let ( let* ) = Tar.( let* ) in + let* _pos = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in Alcotest.(check string) "Filename was transformed" temp_dir (String.sub hdr.file_name 0 (min (String.length hdr.file_name) (String.length temp_dir))); - Ok () + Tar.return (Ok ()) in match Tar_unix.fold f tar_out () with | Error e -> Alcotest.failf "error folding2 %a" Tar_unix.pp_decode_error e diff --git a/unix/tar_lwt_unix.ml b/unix/tar_lwt_unix.ml index 4fdd244..786a449 100644 --- a/unix/tar_lwt_unix.ml +++ b/unix/tar_lwt_unix.ml @@ -114,12 +114,11 @@ let unix_err_to_msg = function (Unix.error_message e) f s) let copy ~dst_fd len = - let open Lwt_result.Infix in let blen = 65536 in let rec read_write ~dst_fd len = if len = 0 then value (Lwt.return (Ok ())) else - let open Tar in + let ( let* ) = Tar.( let* ) in let slen = min blen len in let* str = Tar.really_read slen in let* _written = Lwt_result.map_error unix_err_to_msg @@ -135,12 +134,10 @@ let extract ?(filter = fun _ -> true) ~src dst = (fun () -> Lwt_unix.close fd) (fun _ -> Lwt.return_unit) >|= Result.ok in - let open Lwt_result.Infix in let f ?global:_ hdr () = - let open Tar in + let ( let* ) = Tar.( let* ) in match filter hdr, hdr.Tar.Header.link_indicator with | true, Tar.Header.Link.Normal -> - let open Tar in let* dst = Lwt_result.map_error unix_err_to_msg (safe Lwt_unix.(openfile (Filename.concat dst hdr.Tar.Header.file_name) [ O_WRONLY; O_CREAT ]) hdr.Tar.Header.file_mode) @@ -148,15 +145,14 @@ let extract ?(filter = fun _ -> true) ~src dst = begin try let* () = copy ~dst_fd:dst (Int64.to_int hdr.Tar.Header.file_size) in let* () = value (safe_close dst) in - return (Ok ()) + Tar.return (Ok ()) with exn -> let* () = value (safe_close dst) in - return (Error (`Exn exn)) + Tar.return (Error (`Exn exn)) end | _ -> - let open Tar in let* _off = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in - return (Ok ()) + Tar.return (Ok ()) in fold f src () diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index 36ac1ce..84bd756 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -123,16 +123,16 @@ let unix_err_to_msg = function let copy ~dst_fd len = let blen = 65536 in let rec read_write ~dst_fd len = - let open Tar in + let ( let* ) = Tar.( let* ) in if len = 0 then Tar.return (Ok ()) else let slen = min blen len in - let* str = really_read (min blen len) in + let* str = Tar.really_read (min blen len) in safe (Unix.write_substring dst_fd str 0) slen |> Result.map_error unix_err_to_msg |> function | Ok _ -> read_write ~dst_fd (len - slen) - | Error _ as err -> return err + | Error _ as err -> Tar.return err in read_write ~dst_fd len @@ -152,12 +152,12 @@ let extract ?(filter = fun _ -> true) ~src dst = (* TODO set owner / mode / mtime etc. *) | _ -> (* TODO handle directories, links, etc. *) - let open Tar in - let* _off = seek (Int64.to_int hdr.Tar.Header.file_size) in - return (Ok ()) + let ( let* ) = Tar.( let* ) in + let* _off = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in + Tar.return (Ok ()) else - let open Tar in - let* _off = seek (Int64.to_int hdr.Tar.Header.file_size) in + let ( let* ) = Tar.( let* ) in + let* _off = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in Tar.return (Ok ()) in fold f src () From 0cfd771a75c424504d2742a1d91278d61eaa5f5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Tue, 7 May 2024 17:11:43 +0200 Subject: [PATCH 22/34] Partially implement tar_eio, stub out remainder --- eio/tar_eio.ml | 108 ++++++++++++++++++++++++++++++++++-------------- eio/tar_eio.mli | 50 +++++++++++++++++----- 2 files changed, 116 insertions(+), 42 deletions(-) diff --git a/eio/tar_eio.ml b/eio/tar_eio.ml index d1d07c3..60d6c68 100644 --- a/eio/tar_eio.ml +++ b/eio/tar_eio.ml @@ -18,42 +18,63 @@ open Eio -module Monad = struct - type 'a t = 'a - let (>>=) a f = f a - let return = Fun.id -end +module High : sig + type t + type 'a s = 'a -module Io = struct - type in_channel = Flow.source - type 'a io = 'a - let really_read f b = - let len = Bytes.length b in - let cs = Cstruct.create len in - Flow.read_exact f cs; - Cstruct.blit_to_bytes cs 0 b 0 len - let skip f (n: int) = - let buffer_size = 32768 in - let buffer = Cstruct.create buffer_size in - let rec loop (n: int) = - if n <= 0 then () - else - let amount = min n buffer_size in - let block = Cstruct.sub buffer 0 amount in - Flow.read_exact f block; - loop (n - amount) in - loop n + external inj : 'a s -> ('a, t) Tar.io = "%identity" + external prj : ('a, t) Tar.io -> 'a s = "%identity" +end = struct + type t + type 'a s = 'a - type out_channel = Flow.sink - let really_write f str = Flow.write f [ Cstruct.of_string str ] + external inj : 'a -> 'b = "%identity" + external prj : 'b -> 'a = "%identity" end -let really_read = Flow.read_exact -let skip = Io.skip -let really_write f b = Flow.write f [ b ] +type t = High.t + +let value v = Tar.High (High.inj v) -module HeaderReader = Tar.HeaderReader(Monad)(Io) -module HeaderWriter = Tar.HeaderWriter(Monad)(Io) +let run t f = + let rec run : type a. (a, 'err, t) Tar.t -> (a, 'err) result = function + | Tar.Read len -> + let b = Cstruct.create len in + (match Flow.single_read f b with + | len -> + Ok (Cstruct.to_string ~len b) + | exception End_of_file -> + (* XXX: should we catch other exceptions?! *) + Error `Unexpected_end_of_file) + | Tar.Really_read len -> + let b = Cstruct.create len in + (try + Flow.read_exact f b; + Ok (Cstruct.to_string b) + with End_of_file -> Error `Unexpected_end_of_file) + | Tar.Seek n -> + let buffer_size = 32768 in + let buffer = Cstruct.create buffer_size in + let rec loop (n: int) = + if n <= 0 then Ok (-1) (* XXX: I dunno... *) + else + let amount = min n buffer_size in + let block = Cstruct.sub buffer 0 amount in + Flow.read_exact f block; + loop (n - amount) in + loop n + | Tar.Return value -> value + | Tar.High value -> High.prj value + | Tar.Bind (x, f) -> + match run x with + | Ok value -> run (f value) + | Error _ as err -> err in + run t + +let fold f filename init = + (* XXX(reynir): ??? *) + Eio.Path.with_open_in filename + (run (Tar.fold f init)) (* Eio needs a non-file-opening stat. *) let stat path = @@ -79,3 +100,28 @@ let header_of_file ?level ?getpwuid ?getgrgid filepath : Tar.Header.t = let devminor = if level = Ustar then stat.rdev |> Int64.to_int else 0 in Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name ?uname ?gname ~devmajor ~devminor (snd filepath) file_size +let extract ?filter:(_ = fun _ -> true) ~src:_ _dst = + (* TODO *) + failwith "TODO" + +let create ?level:_ ?global:_ ?filter:(_ = fun _ -> true) ~src:_ _dst = + (* TODO *) + failwith "TODO" + +let append_file ?level:_ ?header:_ _filename _dst = + (* TODO *) + failwith "TODO" + +let write_header ?level:_ _hdr _fl = + (* TODO *) + failwith "TODO" + +let write_global_extended_header ?level:_ _global _fl = + (* TODO *) + failwith "TODO" + +let write_end fl = + let zero_block = Cstruct.of_string Tar.Header.zero_block in + (* TODO: catch exceptions?! *) + Eio.Flow.write fl [ zero_block; zero_block ]; + Ok () diff --git a/eio/tar_eio.mli b/eio/tar_eio.mli index 3c44aea..0b738b6 100644 --- a/eio/tar_eio.mli +++ b/eio/tar_eio.mli @@ -16,17 +16,20 @@ (** I/O for tar-formatted data *) -val really_read: Eio.Flow.source -> Cstruct.t -> unit -(** [really_read fd buf] fills [buf] with data from [fd] or fails - with {!Stdlib.End_of_file}. *) +type t -val really_write: Eio.Flow.sink -> Cstruct.t -> unit -(** [really_write fd buf] writes the full contents of [buf] to - [fd] or fails with {!Stdlib.End_of_file}. *) +val value : ('a, 'err) result -> ('a, 'err, t) Tar.t -val skip : Eio.Flow.source -> int -> unit -(** [skip fd n] reads [n] bytes from [fd] and discards them. If possible, you - should use [Lwt_unix.lseek fd n Lwt_unix.SEEK_CUR] instead. *) +val run : ('a, [> `Unexpected_end_of_file] as 'b, t) Tar.t -> Eio.Flow.source -> ('a, 'b) result + +val fold : + (?global:Tar.Header.Extended.t -> + Tar.Header.t -> + 'a -> + ('a, [> `Fatal of Tar.error | `Unexpected_end_of_file ] as 'b, t) Tar.t) -> + Eio.Fs.dir Eio.Path.t -> + 'a -> + ('a, 'b) result (** Return the header needed for a particular file on disk. [getpwuid] and [getgrgid] are optional functions that should take the uid and gid respectively and return the passwd and group entry @@ -38,5 +41,30 @@ val header_of_file : Eio.Fs.dir Eio.Path.t -> Tar.Header.t -module HeaderReader : Tar.HEADERREADER with type in_channel = Eio.Flow.source and type 'a io = 'a -module HeaderWriter : Tar.HEADERWRITER with type out_channel = Eio.Flow.sink and type 'a io = 'a +val extract : ?filter:(Tar.Header.t -> bool) -> + src:Eio.Fs.dir Eio.Path.t -> + Eio.Fs.dir Eio.Path.t -> + (unit, _) result + +val create : ?level:Tar.Header.compatibility -> + ?global:Tar.Header.Extended.t -> + ?filter:(Tar.Header.t -> bool) -> + src:Eio.Fs.dir Eio.Path.t -> + Eio.Fs.dir Eio.Path.t -> + (unit, _) result + +val append_file : ?level:Tar.Header.compatibility -> + ?header:Tar.Header.t -> + Eio.Fs.dir Eio.Path.t -> + Eio.Flow.sink -> + (unit, _) result + +val write_header : ?level:Tar.Header.compatibility -> + Tar.Header.t -> Eio.Flow.sink -> + (unit, _) result + +val write_global_extended_header : ?level:Tar.Header.compatibility -> + Tar.Header.Extended.t -> Eio.Flow.sink -> + (unit, _) result + +val write_end : Eio.Flow.sink -> (unit, _) result From c24cd1bca1296c520b475ec00a150bf0f4ea0be9 Mon Sep 17 00:00:00 2001 From: Robur Date: Thu, 9 May 2024 07:55:41 +0000 Subject: [PATCH 23/34] Seek returns unit, improve documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The position is not always possible to keep track of, and is not very useful to begin with. The documentation better explains the lightweight higher kinded types trick. Co-authored-by: Calascibetta Romain Co-authored-by: Reynir Björnsson --- lib/tar.ml | 6 +++--- lib/tar.mli | 10 +++++++--- lib/tar_gz.ml | 4 ++-- unix/tar_lwt_unix.ml | 3 ++- unix/tar_unix.ml | 9 +++++---- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/lib/tar.ml b/lib/tar.ml index 078deeb..2fa3875 100644 --- a/lib/tar.ml +++ b/lib/tar.ml @@ -821,7 +821,7 @@ type ('a, 't) io type ('a, 'err, 't) t = | Really_read : int -> (string, 'err, 't) t | Read : int -> (string, 'err, 't) t - | Seek : int -> (int, 'err, 't) t + | Seek : int -> (unit, 'err, 't) t | Bind : ('a, 'err, 't) t * ('a -> ('b, 'err, 't) t) -> ('b, 'err, 't) t | Return : ('a, 'err) result -> ('a, 'err, 't) t | High : (('a, 'err) result, 't) io -> ('a, 'err, 't) t @@ -843,11 +843,11 @@ let fold f init = | Ok (t, Some `Header hdr, g) -> let global = Option.fold ~none:global ~some:(fun g -> Some g) g in let* acc' = f ?global hdr acc in - let* _off = seek (Header.compute_zero_padding_length hdr) in + let* () = seek (Header.compute_zero_padding_length hdr) in go t ?global acc' | Ok (t, Some `Skip n, g) -> let global = Option.fold ~none:global ~some:(fun g -> Some g) g in - let* _off = seek n in + let* () = seek n in go t ?global acc | Ok (t, Some `Read n, g) -> let global = Option.fold ~none:global ~some:(fun g -> Some g) g in diff --git a/lib/tar.mli b/lib/tar.mli index 4a1676b..17cf93f 100644 --- a/lib/tar.mli +++ b/lib/tar.mli @@ -181,21 +181,25 @@ val encode_global_extended_header : ?level:Header.compatibility -> Header.Extend We can compose these actions with [Bind], [Return] and [High]. The latter allows you to use a value [('a, 't) io] that comes from the scheduler used - so you can use an Lwt value (['a Lwt.t]) without depending on Lwt - ([('a, lwt) t]) at this stage. *) + ([('a, lwt) t]) at this stage. + + For further informations, you can look at the paper about Lightweight + Higher Kind Polymorphism available + {{:https://www.cl.cam.ac.uk/~jdy22/papers/lightweight-higher-kinded-polymorphism.pdf} here}. *) type ('a, 't) io type ('a, 'err, 't) t = | Really_read : int -> (string, 'err, 't) t | Read : int -> (string, 'err, 't) t - | Seek : int -> (int, 'err, 't) t + | Seek : int -> (unit, 'err, 't) t | Bind : ('a, 'err, 't) t * ('a -> ('b, 'err, 't) t) -> ('b, 'err, 't) t | Return : ('a, 'err) result -> ('a, 'err, 't) t | High : (('a, 'err) result, 't) io -> ('a, 'err, 't) t val really_read : int -> (string, _, _) t val read : int -> (string, _, _) t -val seek : int -> (int, _, _) t +val seek : int -> (unit, _, _) t val ( let* ) : ('a, 'err, 't) t -> ('a -> ('b, 'err, 't) t) -> ('b, 'err, 't) t val return : ('a, 'err) result -> ('a, 'err, _) t diff --git a/lib/tar_gz.ml b/lib/tar_gz.ml index fb13ad9..f3ce0c8 100644 --- a/lib/tar_gz.ml +++ b/lib/tar_gz.ml @@ -121,10 +121,10 @@ let really_read_through_gz decoder len = type error = [ `Fatal of Tar.error | `Eof | `Gz of string ] -let seek_through_gz : decoder -> int -> (int, [> error ], _) Tar.t = fun state len -> +let seek_through_gz : decoder -> int -> (unit, [> error ], _) Tar.t = fun state len -> let open Tar in let* _buf = really_read_through_gz state len in - Tar.return (Ok 0 (* XXX(dinosaure): actually, [fold] ignores the result. *)) + Tar.return (Ok ()) let gzipped t = let rec go : type a. decoder -> (a, [> error ] as 'err, 't) Tar.t -> (a, 'err, 't) Tar.t = fun decoder -> function diff --git a/unix/tar_lwt_unix.ml b/unix/tar_lwt_unix.ml index 786a449..7de8215 100644 --- a/unix/tar_lwt_unix.ml +++ b/unix/tar_lwt_unix.ml @@ -56,6 +56,7 @@ let read_complete fd buf len = let seek fd n = safe (Lwt_unix.lseek fd n) Unix.SEEK_CUR + |> Lwt_result.map ignore let safe_close fd = Lwt.catch (fun () -> Lwt_unix.close fd) (fun _ -> Lwt.return_unit) @@ -151,7 +152,7 @@ let extract ?(filter = fun _ -> true) ~src dst = Tar.return (Error (`Exn exn)) end | _ -> - let* _off = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in + let* () = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in Tar.return (Ok ()) in fold f src () diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index 84bd756..b64dac0 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -40,6 +40,7 @@ let read_complete fd buf len = let seek fd n = safe (Unix.lseek fd n) Unix.SEEK_CUR + |> Result.map ignore type decode_error = [ | `Fatal of Tar.error @@ -70,8 +71,8 @@ module High : sig type t type 'a s = 'a - external inj : 'a s -> ('a, t) Tar.io = "%identity" - external prj : ('a, t) Tar.io -> 'a s = "%identity" + external inj : 'a s -> ('a, t) Tar.io = "%identity" + external prj : ('a, t) Tar.io -> 'a s = "%identity" end = struct type t type 'a s = 'a @@ -153,11 +154,11 @@ let extract ?(filter = fun _ -> true) ~src dst = | _ -> (* TODO handle directories, links, etc. *) let ( let* ) = Tar.( let* ) in - let* _off = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in + let* () = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in Tar.return (Ok ()) else let ( let* ) = Tar.( let* ) in - let* _off = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in + let* () = Tar.seek (Int64.to_int hdr.Tar.Header.file_size) in Tar.return (Ok ()) in fold f src () From 576dcfffc777a61814f6852be5565777fae317b6 Mon Sep 17 00:00:00 2001 From: Robur Date: Thu, 9 May 2024 07:58:56 +0000 Subject: [PATCH 24/34] Remove [`Msg of string] from Tar_unix.decode_error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Calascibetta Romain Co-authored-by: Reynir Björnsson --- unix/tar_unix.mli | 1 - 1 file changed, 1 deletion(-) diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index 509ca9b..eda3e75 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -20,7 +20,6 @@ type decode_error = [ | `Fatal of Tar.error | `Unix of Unix.error * string * string | `Unexpected_end_of_file - | `Msg of string ] type t From b1c10d015d6fa5d8de982a06c018365211f6021a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Thu, 9 May 2024 10:01:20 +0200 Subject: [PATCH 25/34] Document Tar.fold --- lib/tar.mli | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/tar.mli b/lib/tar.mli index 17cf93f..49fb2a9 100644 --- a/lib/tar.mli +++ b/lib/tar.mli @@ -206,3 +206,6 @@ val return : ('a, 'err) result -> ('a, 'err, _) t type ('a, 'err, 't) fold = (?global:Header.Extended.t -> Header.t -> 'a -> ('a, 'err, 't) t) -> 'a -> ('a, 'err, 't) t val fold : ('a, [> `Fatal of error ], 't) fold +(** [fold f] is a [_ t] that reads an archive and executes [f] on each header. + [f] is expected to either read or skip the file contents, or return an + error. *) From 890c1fe0936871dbb03d345cc69e693de88743d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Thu, 9 May 2024 10:06:40 +0200 Subject: [PATCH 26/34] Fixups --- eio/tar_eio.ml | 2 +- unix/tar_unix.ml | 1 - unix/tar_unix.mli | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/eio/tar_eio.ml b/eio/tar_eio.ml index 60d6c68..597155d 100644 --- a/eio/tar_eio.ml +++ b/eio/tar_eio.ml @@ -56,7 +56,7 @@ let run t f = let buffer_size = 32768 in let buffer = Cstruct.create buffer_size in let rec loop (n: int) = - if n <= 0 then Ok (-1) (* XXX: I dunno... *) + if n <= 0 then Ok () else let amount = min n buffer_size in let block = Cstruct.sub buffer 0 amount in diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index b64dac0..2f4c0cd 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -46,7 +46,6 @@ type decode_error = [ | `Fatal of Tar.error | `Unix of Unix.error * string * string | `Unexpected_end_of_file - | `Msg of string ] let pp_decode_error ppf = function diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index eda3e75..122d532 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -44,7 +44,7 @@ val fold : val extract : ?filter:(Tar.Header.t -> bool) -> src:string -> string -> - (unit, [> `Exn of exn | decode_error ]) result + (unit, [> `Exn of exn | `Msg of string | decode_error ]) result (** [create ~level ~filter ~src dst] creates a tar archive at [dst]. It uses [src], a directory name, as input. If [filter] is provided From 6f4a26b78386007a01c65be3668c6b09137b1b92 Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Wed, 15 May 2024 14:46:31 +0200 Subject: [PATCH 27/34] Add a way to produce a *.tar.gz archive from the new pure API --- bin/otar.ml | 115 ++++++------- lib/tar.ml | 42 ++++- lib/tar.mli | 15 ++ lib/tar_gz.ml | 208 ++++++++++------------- lib/tar_gz.mli | 74 ++------ lib_test/global_extended_headers_test.ml | 2 +- lib_test/parse_test.ml | 10 +- unix/tar_lwt_unix.ml | 3 + unix/tar_unix.ml | 8 +- unix/tar_unix.mli | 13 +- 10 files changed, 234 insertions(+), 256 deletions(-) diff --git a/bin/otar.ml b/bin/otar.ml index d9a0395..5438a48 100644 --- a/bin/otar.ml +++ b/bin/otar.ml @@ -18,60 +18,64 @@ let () = Printexc.record_backtrace true let ( / ) = Filename.concat -let stream_of_fd fd = - let buf = Bytes.create 0x1000 in - fun () -> match Unix.read fd buf 0 (Bytes.length buf) with - | 0 -> None - | len -> - let str = Bytes.sub_string buf 0 len in - Some str - | exception End_of_file -> None +let contents_of_path path = + let fd = ref `None in + let buf = Bytes.create 0x100 in + let rec dispenser () = match !fd with + | `Closed -> Tar.return (Ok None) + | `None -> + let fd' = Unix.openfile path Unix.[ O_RDONLY; O_CLOEXEC ] 0o644 in + fd := `Active fd'; + dispenser () + | `Active fd' -> + match Unix.read fd' buf 0 (Bytes.length buf) with + | 0 | exception End_of_file -> + Unix.close fd'; fd := `Closed; Tar.return (Ok None) + | len -> + let str = Bytes.sub_string buf 0 len in + Tar.return (Ok (Some str)) in + dispenser -let always x = fun _ -> x - -(* -let create_tarball directory oc = +let create_tarball directory fd = let files = Sys.readdir directory in let os = match Sys.os_type with | "Win32" -> Gz.NTFS (* XXX(dinosaure): true? *) | "Unix" | "Cygwin" | _ -> Gz.Unix in let mtime = Unix.gettimeofday () in - let out_channel = Tar_gz.of_out_channel ~level:4 ~mtime:(Int32.of_float mtime) os oc in let hdr = Tar.Header.make ~file_mode:0o755 - ~mod_time:(Int64.of_float mtime) (Filename.concat directory "") 0L in - (match Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel (always None) with - | Ok () -> () - | Error `Msg msg -> Format.eprintf "Error %s writing block\n%!" msg); - Array.iter begin fun filename -> - let fd = Unix.openfile (directory / filename) Unix.[ O_RDONLY; O_CLOEXEC ] 0o644 in - let stat = Unix.LargeFile.lstat (directory / filename) in - match stat.st_kind with - | Unix.S_REG -> - let stream = stream_of_fd fd in - let file_mode = if stat.Unix.LargeFile.st_perm land 0o111 <> 0 then 0o755 else 0o644 in - let mod_time = Int64.of_float stat.Unix.LargeFile.st_mtime in - let user_id = stat.Unix.LargeFile.st_uid in - let group_id = stat.Unix.LargeFile.st_gid in - let hdr = Tar.Header.make + ~mod_time:(Int64.of_float mtime) (Filename.concat directory "") 0L in + let entries = Array.fold_left begin fun acc filename -> + let stat = Unix.LargeFile.stat (directory / filename) in + match stat.st_kind with + | Unix.S_REG -> + let file_mode = if stat.st_perm land 0o111 <> 0 then 0o755 else 0o644 in + let mod_time = Int64.of_float stat.Unix.LargeFile.st_mtime in + let user_id = stat.st_uid in + let group_id = stat.st_gid in + let level = Some Tar.Header.Ustar in + let hdr = Tar.Header.make ~file_mode ~mod_time ~user_id ~group_id - (directory / filename) stat.Unix.LargeFile.st_size in - (match Tar_gz.write_block ~level:Tar.Header.Ustar hdr out_channel stream with - | Ok () -> () - | Error `Msg msg -> Format.eprintf "Error %s writing block\n%!" msg); - Unix.close fd ; - | _ -> - Format.eprintf "Skipping non-regular file %s\n" (Filename.concat directory filename) - end files ; - Tar_gz.write_end out_channel + (directory / filename) stat.st_size in + (level, hdr, contents_of_path (directory / filename)) :: acc + | _ -> acc end [] files in + let entries = List.to_seq entries in + let entries = Seq.to_dispenser entries in + let entries () = Tar.return (Ok (entries ())) in + let t = Tar.out ~level:Tar.Header.Ustar hdr entries in + let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_float mtime) os t in + match Tar_unix.run t fd with + | Ok () -> () + | Error err -> + Format.eprintf "%s: %a\n%!" Sys.executable_name Tar_unix.pp_error err let make directory oc = - let oc, oc_close, _gz = match oc with - | None -> stdout, ignore, false + let fd, fd_close = match oc with + | None -> Unix.stdout, ignore | Some filename -> - let oc = open_out filename in - oc, (fun () -> close_out oc), Filename.extension filename = ".gz" in - create_tarball directory oc ; oc_close () - *) + let fd = Unix.openfile filename Unix.[ O_TRUNC; O_CREAT; O_WRONLY; O_CLOEXEC ] 0o644 in + fd, (fun () -> Unix.close fd) in + Fun.protect ~finally:fd_close @@ fun () -> + create_tarball directory fd let sizes = [| "B"; "KiB"; "MiB"; "GiB"; "TiB"; "PiB"; "EiB"; "ZiB"; "YiB" |] @@ -89,46 +93,31 @@ let list filename = hdr.Tar.Header.file_name (Tar.Header.Link.to_string hdr.link_indicator) (bytes_to_size ~decimals:2) hdr.Tar.Header.file_size ; - (* - (* Alternatively: - let padding = Tar.Header.compute_zero_padding_length hdr in - let data = Int64.to_int hdr.Tar.Header.file_size in - let to_skip = data + padding in *) - Tar_gz.skip ic to_skip ; - go global () - | Error `Eof -> () - | Error `Fatal e -> - Format.eprintf "Error listing archive: %a\n%!" Tar.pp_error e; - exit 2 - *) let open Tar in - let to_skip = Header.(Int64.to_int (to_sectors hdr) * length) in - let* _ = seek to_skip in + let* _ = seek (Int64.to_int hdr.Tar.Header.file_size) in return (Ok ()) in let fd = Unix.openfile filename [ Unix.O_RDONLY ] 0 in - match Tar_unix.run (Tar_gz.gzipped (Tar.fold go ())) fd with + match Tar_unix.run (Tar_gz.in_gzipped (Tar.fold go ())) fd with | Ok () -> () | Error (`Unix _) -> Format.eprintf "Some UNIX error occurred.\n%!" | Error (`Msg e) -> Format.eprintf "Some error: %s.\n%!" e - | Error `Unexpected_end_of_file -> + | Error (`Unexpected_end_of_file | `Eof) -> Format.eprintf "Unexpected end of file.\n%!" - | Error `Eof | Error `Gz _ -> - Format.eprintf "Some fatal error occurred.\n%!" + | Error `Gz err -> + Format.eprintf "Some Gzip error occurred: %s.\n%!" err | Error (`Fatal _) -> Format.eprintf "Some fatal error occurred.\n%!" let () = match Sys.argv with | [| _; "list"; filename; |] when Sys.file_exists filename -> list filename - (* | [| _; directory |] when Sys.is_directory directory -> make directory None | [| _; directory; output |] when Sys.is_directory directory -> make directory (Some output) - *) | _ -> let cmd = Filename.basename Sys.argv.(0) in Format.eprintf "%s []\n%s list \n" cmd cmd diff --git a/lib/tar.ml b/lib/tar.ml index 2fa3875..819b60e 100644 --- a/lib/tar.ml +++ b/lib/tar.ml @@ -795,7 +795,7 @@ let encode_unextended_header ?level header = let encode_extended_header ?level scope hdr = let link_indicator, link_indicator_name = match scope with | `Per_file -> Header.Link.PerFileExtendedHeader, "paxheader" - | `Global ->Header.Link.GlobalExtendedHeader, "pax_global_header" + | `Global -> Header.Link.GlobalExtendedHeader, "pax_global_header" | _ -> assert false in let pax_payload = Header.Extended.marshal hdr in @@ -825,12 +825,14 @@ type ('a, 'err, 't) t = | Bind : ('a, 'err, 't) t * ('a -> ('b, 'err, 't) t) -> ('b, 'err, 't) t | Return : ('a, 'err) result -> ('a, 'err, 't) t | High : (('a, 'err) result, 't) io -> ('a, 'err, 't) t + | Write : string -> (unit, 'err, 't) t let ( let* ) x f = Bind (x, f) let return x = Return x let really_read n = Really_read n let read n = Read n let seek n = Seek n +let write str = Write str type ('a, 'err, 't) fold = (?global:Header.Extended.t -> Header.t -> 'a -> ('a, 'err, 't) t) -> 'a -> ('a, 'err, 't) t @@ -859,3 +861,41 @@ let fold f init = | Error `Eof -> return (Ok acc) | Error `Fatal _ as e -> return e in go (decode_state ()) init + +let rec writev = function + | [] -> return (Ok ()) + | x :: r -> + let* () = write x in + writev r + +let rec pipe stream = + let* block = stream () in + match block with + | Some str -> let* () = writev [ str ] in pipe stream + | None -> return (Ok ()) + +type ('err, 't) content = unit -> (string option, 'err, 't) t +type ('err, 't) entry = Header.compatibility option * Header.t * ('err, 't) content +type ('err, 't) entries = unit -> (('err, 't) entry option, 'err, 't) t + +let out ?level hdr entries = + let rec go () = + let* entry = entries () in + match entry with + | None -> + let* () = writev [ Header.zero_block; Header.zero_block ] in + return (Ok ()) + | Some (level, hdr, stream) -> + match encode_header ?level hdr with + | Ok sstr -> + let* () = writev sstr in + let* () = pipe stream in + let* () = writev [ Header.zero_padding hdr ] in + go () + | Error _ as err -> return err in + match encode_header ?level hdr with + | Error _ as err -> return err + | Ok sstr -> + let* () = writev sstr in + let* () = writev [ Header.zero_padding hdr ] in + go () diff --git a/lib/tar.mli b/lib/tar.mli index 49fb2a9..da642ea 100644 --- a/lib/tar.mli +++ b/lib/tar.mli @@ -196,12 +196,14 @@ type ('a, 'err, 't) t = | Bind : ('a, 'err, 't) t * ('a -> ('b, 'err, 't) t) -> ('b, 'err, 't) t | Return : ('a, 'err) result -> ('a, 'err, 't) t | High : (('a, 'err) result, 't) io -> ('a, 'err, 't) t + | Write : string -> (unit, 'err, 't) t val really_read : int -> (string, _, _) t val read : int -> (string, _, _) t val seek : int -> (unit, _, _) t val ( let* ) : ('a, 'err, 't) t -> ('a -> ('b, 'err, 't) t) -> ('b, 'err, 't) t val return : ('a, 'err) result -> ('a, 'err, _) t +val write : string -> (unit, _, _) t type ('a, 'err, 't) fold = (?global:Header.Extended.t -> Header.t -> 'a -> ('a, 'err, 't) t) -> 'a -> ('a, 'err, 't) t @@ -209,3 +211,16 @@ val fold : ('a, [> `Fatal of error ], 't) fold (** [fold f] is a [_ t] that reads an archive and executes [f] on each header. [f] is expected to either read or skip the file contents, or return an error. *) + +type ('err, 't) content = unit -> (string option, 'err, 't) t +type ('err, 't) entry = Header.compatibility option * Header.t * ('err, 't) content +type ('err, 't) entries = unit -> (('err, 't) entry option, 'err, 't) t + +val out : + ?level:Header.compatibility + -> Header.t + -> ([> `Msg of string ] as 'err, 't) entries + -> (unit, 'err, 't) t +(** [out hdr entries] is a [_ t] that writes [entries] into an archive. [hdr] is + the global header and each entry must come from a {!type:content} stream and + the associated header.*) diff --git a/lib/tar_gz.ml b/lib/tar_gz.ml index f3ce0c8..56debe1 100644 --- a/lib/tar_gz.ml +++ b/lib/tar_gz.ml @@ -17,27 +17,6 @@ external ba_get_int32_ne : De.bigstring -> int -> int32 = "%caml_bigstring_get32" external ba_set_int32_ne : De.bigstring -> int -> int32 -> unit = "%caml_bigstring_set32" -(* -let bigstring_to_string ?(off= 0) ?len ba = - let len = match len with - | Some len -> len - | None -> De.bigstring_length ba - off in - let res = Bytes.create len in - let len0 = len land 3 in - let len1 = len asr 2 in - for i = 0 to len1 - 1 do - let i = i * 4 in - let v = ba_get_int32_ne ba i in - Bytes.set_int32_ne res i v - done; - for i = 0 to len0 - 1 do - let i = (len1 * 4) + i in - let v = Bigarray.Array1.get ba i in - Bytes.set res i v - done; - Bytes.unsafe_to_string res -*) - let bigstring_blit_string src ~src_off dst ~dst_off ~len = let len0 = len land 3 in let len1 = len asr 2 in @@ -121,13 +100,17 @@ let really_read_through_gz decoder len = type error = [ `Fatal of Tar.error | `Eof | `Gz of string ] -let seek_through_gz : decoder -> int -> (unit, [> error ], _) Tar.t = fun state len -> +let seek_through_gz + : decoder -> int -> (unit, [> error ], _) Tar.t + = fun state len -> let open Tar in let* _buf = really_read_through_gz state len in Tar.return (Ok ()) -let gzipped t = - let rec go : type a. decoder -> (a, [> error ] as 'err, 't) Tar.t -> (a, 'err, 't) Tar.t = fun decoder -> function +let in_gzipped t = + let rec go + : type a. decoder -> (a, [> error ] as 'err, 't) Tar.t -> (a, 'err, 't) Tar.t + = fun decoder -> function | Tar.Really_read len -> really_read_through_gz decoder len | Tar.Read _len -> assert false (* XXX(dinosaure): actually does not emit [Tar.Read]. *) @@ -135,7 +118,8 @@ let gzipped t = | Tar.Return _ as ret -> ret | Tar.Bind (x, f) -> Tar.Bind (go decoder x, (fun x -> go decoder (f x))) - | Tar.High _ as high -> high in + | Tar.High _ as high -> high + | Tar.Write _ -> assert false in let decoder = let oc_buffer = De.bigstring_create 0x1000 in { gz= Gz.Inf.decoder `Manual ~o:oc_buffer @@ -145,95 +129,87 @@ let gzipped t = ; pos= 0 } in go decoder t -(* -module Make - (Async : Tar.ASYNC) - (Writer : Tar.WRITER with type 'a io = 'a Async.t) - (Reader : READER with type 'a io = 'a Async.t) -= struct - open Async - - module Gz_writer = struct - type out_channel = - { mutable gz : Gz.Def.encoder - ; ic_buffer : De.bigstring - ; oc_buffer : De.bigstring - ; out_channel : Writer.out_channel } - - type 'a io = 'a Async.t - - let really_write ({ gz; ic_buffer; oc_buffer; out_channel; _ } as state) str = - let rec until_await gz = - match Gz.Def.encode gz with - | `Await gz -> Async.return gz - | `Flush gz -> - let len = De.bigstring_length oc_buffer - Gz.Def.dst_rem gz in - let str = bigstring_to_string oc_buffer ~off:0 ~len in - Writer.really_write out_channel str >>= fun () -> - until_await (Gz.Def.dst gz oc_buffer 0 (De.bigstring_length oc_buffer)) - | `End _gz -> assert false - and go gz (str, str_off, str_len) = - if str_len = 0 - then ( state.gz <- gz ; Async.return () ) - else ( let len = min str_len (De.bigstring_length ic_buffer) in - bigstring_blit_string str ~src_off:0 ic_buffer ~dst_off:0 ~len; - let gz = Gz.Def.src gz ic_buffer 0 len in - until_await gz >>= fun gz -> - go gz (str, str_off + len, str_len - len) ) in - go gz (str, 0, String.length str) - end - - type out_channel = Gz_writer.out_channel - - let of_out_channel ?bits:(w_bits= 15) ?q:(q_len= 0x1000) ~level ~mtime os out_channel = - let ic_buffer = De.bigstring_create (4 * 4 * 1024) in - let oc_buffer = De.bigstring_create 4096 in - let gz = - let w = De.Lz77.make_window ~bits:w_bits in - let q = De.Queue.create q_len in - Gz.Def.encoder `Manual `Manual ~mtime os ~q ~w ~level in - let gz = Gz.Def.dst gz oc_buffer 0 (De.bigstring_length oc_buffer) in - { Gz_writer.gz; ic_buffer; oc_buffer; out_channel; } - - let write_block ?level hdr ({ Gz_writer.ic_buffer= buf; oc_buffer; out_channel; _ } as state) block = - HeaderWriter.write ?level hdr state >>= function - | Error _ as e -> return e - | Ok () -> - (* XXX(dinosaure): we can refactor this codec with [Gz_writer.really_write] - but this loop saves and uses [ic_buffer]/[buf] to avoid extra - allocations on the case between [string] and [bigstring]. *) - let rec deflate (str, off, len) gz = match Gz.Def.encode gz with - | `Await gz -> - if len = 0 - then block () >>= function - | None -> state.gz <- gz ; Async.return () - | Some str -> deflate (str, 0, String.length str) gz - else ( let len' = min len (De.bigstring_length buf) in - bigstring_blit_string str ~src_off:off buf ~dst_off:0 ~len:len'; - deflate (str, off + len', len - len') - (Gz.Def.src gz buf 0 len') ) - | `Flush gz -> - let len = De.bigstring_length oc_buffer - Gz.Def.dst_rem gz in - let out = bigstring_to_string oc_buffer ~len in - Writer.really_write out_channel out >>= fun () -> - deflate (str, off, len) (Gz.Def.dst gz oc_buffer 0 (De.bigstring_length oc_buffer)) - | `End _gz -> assert false in - deflate ("", 0, 0) state.gz >>= fun () -> - Gz_writer.really_write state (Tar.Header.zero_padding hdr) >>= fun () -> - return (Ok ()) +type encoder = + { mutable state : [ `Await of Gz.Def.encoder ] + ; ic_buffer : De.bigstring + ; oc_buffer : De.bigstring } + +let ( let* ) x f = Tar.Bind (x, f) + +let rec until_await oc_pos oc_buffer = function + | `Flush gz as state -> + let max = De.bigstring_length oc_buffer - Gz.Def.dst_rem gz - oc_pos in + let len = min 0x100 max in + let res = Bytes.create len in + bigstring_blit_bytes oc_buffer ~src_off:0 res ~dst_off:0 ~len; + let* () = Tar.write (Bytes.unsafe_to_string res) in + if len > 0 then until_await (oc_pos + len) oc_buffer state + else + Gz.Def.dst gz oc_buffer 0 (De.bigstring_length oc_buffer) + |> Gz.Def.encode + |> until_await 0 oc_buffer + | `Await gz -> Tar.return (Ok (`Await gz)) + | `End _ -> assert false + +let rec until_end oc_pos oc_buffer = function + | `Await _ -> assert false + | (`Flush gz | `End gz) as state -> + let max = De.bigstring_length oc_buffer - Gz.Def.dst_rem gz - oc_pos in + let len = min 0x100 max in + let res = Bytes.create len in + bigstring_blit_bytes oc_buffer ~src_off:0 res ~dst_off:0 ~len; + let* () = Tar.write (Bytes.unsafe_to_string res) in + if len > 0 then until_end (oc_pos + len) oc_buffer state + else match state with + | `End _ -> Tar.return (Ok ()) + | `Flush gz -> + Gz.Def.dst gz oc_buffer 0 (De.bigstring_length oc_buffer) + |> Gz.Def.encode + |> until_end 0 oc_buffer + +let write_gz ({ state; ic_buffer; oc_buffer; } as encoder) str = + let rec go (str, str_off, str_len) state = + if str_len = 0 + then Tar.return (Ok state) + else begin + let len = min str_len (De.bigstring_length ic_buffer) in + bigstring_blit_string str ~src_off:str_off ic_buffer ~dst_off:0 ~len; + let `Await gz = state in + let gz = Gz.Def.src gz ic_buffer 0 len in + let* state = until_await 0 oc_buffer (Gz.Def.encode gz) in + go (str, str_off + len, str_len - len) state + end in + let* state = go (str, 0, String.length str) state in + encoder.state <- state; + Tar.return (Ok ()) - let write_end ({ Gz_writer.oc_buffer; out_channel; _ } as state) = - Gz_writer.really_write state Tar.Header.zero_block >>= fun () -> - Gz_writer.really_write state Tar.Header.zero_block >>= fun () -> - let rec until_end gz = match Gz.Def.encode gz with - | `Await _gz -> assert false - | `Flush gz | `End gz as flush_or_end -> - let max = De.bigstring_length oc_buffer - Gz.Def.dst_rem gz in - Writer.really_write out_channel (bigstring_to_string oc_buffer ~len:max) >>= fun () -> - match flush_or_end with - | `Flush gz -> - until_end (Gz.Def.dst gz oc_buffer 0 (De.bigstring_length oc_buffer)) - | `End _gz -> Async.return () in - until_end (Gz.Def.src state.gz De.bigstring_empty 0 0) -end -*) +let out_gzipped ~level ~mtime os t = + let rec go + : type a. encoder -> (a, 'err, 't) Tar.t -> (a, 'err, 't) Tar.t + = fun encoder -> function + | Tar.Really_read _ as ret -> ret + | Tar.Read _ as ret -> ret + | Tar.Seek _ as ret -> ret + | Tar.Return _ as ret -> ret + | Tar.Bind (x, f) -> + Tar.Bind (go encoder x, (fun x -> go encoder (f x))) + | Tar.High _ as high -> high + | Tar.Write str -> write_gz encoder str in + let ic_buffer = De.bigstring_create 0x1000 in + let oc_buffer = De.bigstring_create 0x1000 in + let q = De.Queue.create 4096 in + let w = De.Lz77.make_window ~bits:15 in + let gz = Gz.Def.encoder `Manual `Manual ~q ~w ~level ~mtime os in + let gz = Gz.Def.dst gz oc_buffer 0 (De.bigstring_length oc_buffer) in + let* state = until_await 0 oc_buffer (Gz.Def.encode gz) in + let encoder = + { state + ; ic_buffer + ; oc_buffer } in + let* result = go encoder t in + let `Await gz = encoder.state in + let* () = + Gz.Def.src gz ic_buffer 0 0 + |> Gz.Def.encode + |> until_end 0 oc_buffer in + Tar.return (Ok result) diff --git a/lib/tar_gz.mli b/lib/tar_gz.mli index 846b2c3..f7dd4ae 100644 --- a/lib/tar_gz.mli +++ b/lib/tar_gz.mli @@ -16,65 +16,15 @@ type error = [ `Fatal of Tar.error | `Eof | `Gz of string ] -val gzipped : ('a, ([> error ] as 'err), 't) Tar.t -> ('a, 'err, 't) Tar.t - -(* -module type READER = sig - type in_channel - type 'a io - val read : in_channel -> bytes -> int io -end - -module Make - (Async : Tar.ASYNC) - (Writer : Tar.WRITER with type 'a io = 'a Async.t) - (Reader : READER with type 'a io = 'a Async.t) -: sig - type in_channel - - val of_in_channel : internal:De.bigstring -> Reader.in_channel -> in_channel - - val really_read : in_channel -> bytes -> unit Async.t - (** [really_read fd buf] fills [buf] with data from [fd] or raises - {!Stdlib.End_of_file}. *) - - val skip : in_channel -> int -> unit Async.t - - type out_channel - - val of_out_channel : ?bits:int -> ?q:int -> level:int -> - mtime:int32 -> Gz.os -> Writer.out_channel -> out_channel - - val write_block : ?level:Tar.Header.compatibility -> Tar.Header.t -> - out_channel -> (unit -> string option Async.t) -> (unit, [> `Msg of string ]) result Async.t - (** [write_block hdr oc stream] writes [hdr], then {i deflate} the given - [stream], then zero-pads so the stream is positionned for the next - block. - - A simple usage to write a file: - {[ - let stream_of_fd fd = - let buf = Bytes.create 0x1000 in - fun () -> match Unix.read fd buf 0 (Bytes.length buf) with - | 0 -> None - | len -> Some (Bytes.sub_string buf 0 len) - | exception End_of_file -> None - - let add_file oc filename = - let fd = Unix.openfile filename Unix.[ O_RDONLY ] 0o644 in - let hdr = Tar.Header.make ... in - (match write_block hdr oc (stream_of_fd fd) with - | Ok () -> () - | Error `Msg msg -> print_endline ("error: " ^ msg)); - Unix.close fd - ]} *) - - val write_end : out_channel -> unit Async.t - (** [write_end oc] writes a stream terminator to [oc]. *) - - module HeaderReader : - Tar.HEADERREADER with type in_channel = in_channel and type 'a io = 'a Async.t - module HeaderWriter : - Tar.HEADERWRITER with type out_channel = out_channel and type 'a io = 'a Async.t -end -*) +val in_gzipped : ('a, ([> error ] as 'err), 't) Tar.t -> ('a, 'err, 't) Tar.t + (** [in_gzipped] takes a {i tar process} (like {!val:Tar.fold}) and add a + uncompression layer to be able to manipulate a [*.tar.gz] archive. *) + +val out_gzipped : + level:int + -> mtime:int32 + -> Gz.os + -> ('a, 'err, 't) Tar.t + -> ('a, 'err, 't) Tar.t +(** [out_gzipped] takes a {i tar process} (like {!val:Tar.out}) and add a + compression layer to be able to generate a [*.tar.gz] archive. *) diff --git a/lib_test/global_extended_headers_test.ml b/lib_test/global_extended_headers_test.ml index ff93383..a4ef1ab 100644 --- a/lib_test/global_extended_headers_test.ml +++ b/lib_test/global_extended_headers_test.ml @@ -71,7 +71,7 @@ let use_global_extended_headers _test_ctxt = match Tar_unix.fold f "test.tar" 0 with | Ok 4 -> () | Ok n -> Alcotest.failf "early abort, expected 4, received %u" n - | Error e -> Alcotest.failf "failed to read: %a" Tar_unix.pp_decode_error e + | Error e -> Alcotest.failf "failed to read: %a" Tar_unix.pp_error e let () = let suite = "tar - pax global extended headers", [ diff --git a/lib_test/parse_test.ml b/lib_test/parse_test.ml index d0a6469..30f303b 100644 --- a/lib_test/parse_test.ml +++ b/lib_test/parse_test.ml @@ -40,7 +40,7 @@ let list filename = in match Tar_unix.fold f filename [] with | Ok acc -> List.rev acc - | Error e -> Alcotest.failf "unexpected error: %a" Tar_unix.pp_decode_error e + | Error e -> Alcotest.failf "unexpected error: %a" Tar_unix.pp_error e let pp_header f x = Fmt.pf f "%s" (Tar.Header.to_detailed_string x) let header = Alcotest.testable pp_header ( = ) @@ -175,7 +175,7 @@ let can_list_pax_implicit_dir () = in match Tar_unix.fold f "lib_test/pax-shenanigans.tar" () with | Ok () -> () - | Error e -> Alcotest.failf "unexpected error: %a" Tar_unix.pp_decode_error e + | Error e -> Alcotest.failf "unexpected error: %a" Tar_unix.pp_error e (* Sample tar generated with commit 1583f71ea33b2836d3fb996ac7dc35d55abe2777: [let buf = @@ -198,7 +198,7 @@ let can_list_longlink_implicit_dir () = in match Tar_unix.fold f "lib_test/long-implicit-dir.tar" () with | Ok () -> () - | Error e -> Alcotest.failf "unexpected error: %a" Tar_unix.pp_decode_error e + | Error e -> Alcotest.failf "unexpected error: %a" Tar_unix.pp_error e let starts_with ~prefix s = let len_s = String.length s @@ -229,7 +229,7 @@ let can_transform_tar () = | Error _ -> Alcotest.fail "error writing header" in match Tar_unix.fold f tar_in () with - | Error e -> Alcotest.failf "error folding %a" Tar_unix.pp_decode_error e + | Error e -> Alcotest.failf "error folding %a" Tar_unix.pp_error e | Ok () -> match Tar_unix.write_end fd_out with | Error _ -> Alcotest.fail "couldn't write end" @@ -243,7 +243,7 @@ let can_transform_tar () = Tar.return (Ok ()) in match Tar_unix.fold f tar_out () with - | Error e -> Alcotest.failf "error folding2 %a" Tar_unix.pp_decode_error e + | Error e -> Alcotest.failf "error folding2 %a" Tar_unix.pp_error e | Ok () -> () module Block4096 = struct diff --git a/unix/tar_lwt_unix.ml b/unix/tar_lwt_unix.ml index 7de8215..cb7bcb5 100644 --- a/unix/tar_lwt_unix.ml +++ b/unix/tar_lwt_unix.ml @@ -82,6 +82,9 @@ let value v = Tar.High (High.inj v) let run t fd = let open Lwt_result.Infix in let rec run : type a. (a, [> decode_error ] as 'err, t) Tar.t -> (a, 'err) result Lwt.t = function + | Tar.Write str -> + safe (Lwt_unix.write_string fd str 0) (String.length str) >>= fun _write -> + Lwt_result.return () | Tar.Read len -> let b = Bytes.make len '\000' in safe (Lwt_unix.read fd b 0) len >>= fun read -> diff --git a/unix/tar_unix.ml b/unix/tar_unix.ml index 2f4c0cd..761f3b9 100644 --- a/unix/tar_unix.ml +++ b/unix/tar_unix.ml @@ -42,13 +42,14 @@ let seek fd n = safe (Unix.lseek fd n) Unix.SEEK_CUR |> Result.map ignore -type decode_error = [ +type error = [ | `Fatal of Tar.error | `Unix of Unix.error * string * string | `Unexpected_end_of_file + | `Msg of string ] -let pp_decode_error ppf = function +let pp_error ppf = function | `Fatal err -> Tar.pp_error ppf err | `Unix (err, fname, arg) -> Format.fprintf ppf "Unix error %s (function %s, arg %s)" @@ -86,6 +87,9 @@ let value v = Tar.High (High.inj v) let run t fd = let rec run : type a. (a, _ as 'err, t) Tar.t -> (a, 'err) result = function + | Tar.Write str -> + let* _write = safe (Unix.write_substring fd str 0) (String.length str) in + Ok () | Tar.Read len -> let b = Bytes.make len '\000' in let* read = safe (Unix.read fd b 0) len in diff --git a/unix/tar_unix.mli b/unix/tar_unix.mli index 122d532..77f1b7c 100644 --- a/unix/tar_unix.mli +++ b/unix/tar_unix.mli @@ -16,17 +16,18 @@ (** Unix I/O for tar-formatted data. *) -type decode_error = [ +type error = [ | `Fatal of Tar.error | `Unix of Unix.error * string * string | `Unexpected_end_of_file + | `Msg of string ] type t -val pp_decode_error : Format.formatter -> decode_error -> unit +val pp_error : Format.formatter -> error -> unit -val run : ('a, [> decode_error ] as 'b, t) Tar.t -> Unix.file_descr -> ('a, 'b) result +val run : ('a, [> error ] as 'b, t) Tar.t -> Unix.file_descr -> ('a, 'b) result val value : ('a, 'err) result -> ('a, 'err, t) Tar.t (** [fold f filename acc] folds over the tar archive. The function [f] is called @@ -34,8 +35,8 @@ val value : ('a, 'err) result -> ('a, 'err, t) Tar.t descriptor by [hdr.Tar.Header.file_size]. *) val fold : (?global:Tar.Header.Extended.t -> Tar.Header.t -> 'a -> - ('a, decode_error, t) Tar.t) -> - string -> 'a -> ('a, decode_error) result + ('a, error, t) Tar.t) -> + string -> 'a -> ('a, error) result (** [extract ~filter ~src dst] extracts the tar archive [src] into the directory [dst]. If [dst] does not exist, it is created. If [filter] is @@ -44,7 +45,7 @@ val fold : val extract : ?filter:(Tar.Header.t -> bool) -> src:string -> string -> - (unit, [> `Exn of exn | `Msg of string | decode_error ]) result + (unit, [> `Exn of exn | error ]) result (** [create ~level ~filter ~src dst] creates a tar archive at [dst]. It uses [src], a directory name, as input. If [filter] is provided From 9a4ee141aa48869c3c6392e002927df8f29a82d6 Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Wed, 15 May 2024 18:30:24 +0200 Subject: [PATCH 28/34] Apply suggestions from @reynir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Reynir Björnsson --- lib/tar.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/tar.ml b/lib/tar.ml index 819b60e..5bf0467 100644 --- a/lib/tar.ml +++ b/lib/tar.ml @@ -871,7 +871,7 @@ let rec writev = function let rec pipe stream = let* block = stream () in match block with - | Some str -> let* () = writev [ str ] in pipe stream + | Some str -> let* () = write str in pipe stream | None -> return (Ok ()) type ('err, 't) content = unit -> (string option, 'err, 't) t @@ -890,12 +890,12 @@ let out ?level hdr entries = | Ok sstr -> let* () = writev sstr in let* () = pipe stream in - let* () = writev [ Header.zero_padding hdr ] in + let* () = write (Header.zero_padding hdr)] in go () | Error _ as err -> return err in match encode_header ?level hdr with | Error _ as err -> return err | Ok sstr -> let* () = writev sstr in - let* () = writev [ Header.zero_padding hdr ] in + let* () = write (Header.zero_padding hdr) in go () From 9684d77f88ca80d81d995f653ab7aca904d01fb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Wed, 24 Jul 2024 14:50:59 +0200 Subject: [PATCH 29/34] Fix Tar.out The global header is now an optional Tar.Header.t. In otar we then can't use the "global header" for the directory entry (it's not a proper global header but just a regular directory header) - so we add it to the list of entries. --- bin/otar.ml | 7 ++++--- lib/tar.ml | 20 +++++++++++--------- lib/tar.mli | 2 +- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/bin/otar.ml b/bin/otar.ml index 5438a48..46bd29c 100644 --- a/bin/otar.ml +++ b/bin/otar.ml @@ -42,8 +42,9 @@ let create_tarball directory fd = | "Win32" -> Gz.NTFS (* XXX(dinosaure): true? *) | "Unix" | "Cygwin" | _ -> Gz.Unix in let mtime = Unix.gettimeofday () in - let hdr = Tar.Header.make ~file_mode:0o755 + let dir_hdr = Tar.Header.make ~file_mode:0o755 ~mod_time:(Int64.of_float mtime) (Filename.concat directory "") 0L in + let dir_entry = (None, dir_hdr, (fun () -> Tar.return (Ok None))) in let entries = Array.fold_left begin fun acc filename -> let stat = Unix.LargeFile.stat (directory / filename) in match stat.st_kind with @@ -58,10 +59,10 @@ let create_tarball directory fd = (directory / filename) stat.st_size in (level, hdr, contents_of_path (directory / filename)) :: acc | _ -> acc end [] files in - let entries = List.to_seq entries in + let entries = List.to_seq (dir_entry :: entries) in let entries = Seq.to_dispenser entries in let entries () = Tar.return (Ok (entries ())) in - let t = Tar.out ~level:Tar.Header.Ustar hdr entries in + let t = Tar.out ~level:Tar.Header.Ustar entries in let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_float mtime) os t in match Tar_unix.run t fd with | Ok () -> () diff --git a/lib/tar.ml b/lib/tar.ml index 5bf0467..fe01e93 100644 --- a/lib/tar.ml +++ b/lib/tar.ml @@ -796,7 +796,6 @@ let encode_extended_header ?level scope hdr = let link_indicator, link_indicator_name = match scope with | `Per_file -> Header.Link.PerFileExtendedHeader, "paxheader" | `Global -> Header.Link.GlobalExtendedHeader, "pax_global_header" - | _ -> assert false in let pax_payload = Header.Extended.marshal hdr in let pax = @@ -878,7 +877,7 @@ type ('err, 't) content = unit -> (string option, 'err, 't) t type ('err, 't) entry = Header.compatibility option * Header.t * ('err, 't) content type ('err, 't) entries = unit -> (('err, 't) entry option, 'err, 't) t -let out ?level hdr entries = +let out ?level ?global_hdr entries = let rec go () = let* entry = entries () in match entry with @@ -890,12 +889,15 @@ let out ?level hdr entries = | Ok sstr -> let* () = writev sstr in let* () = pipe stream in - let* () = write (Header.zero_padding hdr)] in + let* () = write (Header.zero_padding hdr) in go () | Error _ as err -> return err in - match encode_header ?level hdr with - | Error _ as err -> return err - | Ok sstr -> - let* () = writev sstr in - let* () = write (Header.zero_padding hdr) in - go () + match global_hdr with + | None -> go () + | Some hdr -> + (* [encode_extended_header] includes padding *) + match encode_extended_header ?level `Global hdr with + | Error _ as err -> return err + | Ok sstr -> + let* () = writev sstr in + go () diff --git a/lib/tar.mli b/lib/tar.mli index da642ea..4a8af7e 100644 --- a/lib/tar.mli +++ b/lib/tar.mli @@ -218,7 +218,7 @@ type ('err, 't) entries = unit -> (('err, 't) entry option, 'err, 't) t val out : ?level:Header.compatibility - -> Header.t + -> ?global_hdr:Header.Extended.t -> ([> `Msg of string ] as 'err, 't) entries -> (unit, 'err, 't) t (** [out hdr entries] is a [_ t] that writes [entries] into an archive. [hdr] is From 69f530180df042273d76eb408586cdec73405643 Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Wed, 31 Jul 2024 11:43:26 +0200 Subject: [PATCH 30/34] Don't use Seq to keep compatibility with older version of OCaml --- bin/otar.ml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bin/otar.ml b/bin/otar.ml index 46bd29c..59c3245 100644 --- a/bin/otar.ml +++ b/bin/otar.ml @@ -36,6 +36,12 @@ let contents_of_path path = Tar.return (Ok (Some str)) in dispenser +let to_stream lst = + let lst = ref lst in + fun () -> match !lst with + | [] -> None + | x :: r -> lst := r; Some x + let create_tarball directory fd = let files = Sys.readdir directory in let os = match Sys.os_type with @@ -59,8 +65,7 @@ let create_tarball directory fd = (directory / filename) stat.st_size in (level, hdr, contents_of_path (directory / filename)) :: acc | _ -> acc end [] files in - let entries = List.to_seq (dir_entry :: entries) in - let entries = Seq.to_dispenser entries in + let entries = to_stream (dir_entry :: entries) in let entries () = Tar.return (Ok (entries ())) in let t = Tar.out ~level:Tar.Header.Ustar entries in let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_float mtime) os t in From 89a5f4980bd2c35705dd01826d911b3a088f4873 Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Wed, 31 Jul 2024 11:50:46 +0200 Subject: [PATCH 31/34] Upgrade the eio package --- eio/tar_eio.ml | 5 +++-- eio/tar_eio.mli | 24 ++++++++++++------------ tar-eio.opam | 2 +- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/eio/tar_eio.ml b/eio/tar_eio.ml index 597155d..d0fae0b 100644 --- a/eio/tar_eio.ml +++ b/eio/tar_eio.ml @@ -36,8 +36,9 @@ type t = High.t let value v = Tar.High (High.inj v) -let run t f = +let run_read_only t f = let rec run : type a. (a, 'err, t) Tar.t -> (a, 'err) result = function + | Tar.Write _ -> assert false | Tar.Read len -> let b = Cstruct.create len in (match Flow.single_read f b with @@ -74,7 +75,7 @@ let run t f = let fold f filename init = (* XXX(reynir): ??? *) Eio.Path.with_open_in filename - (run (Tar.fold f init)) + (run_read_only (Tar.fold f init)) (* Eio needs a non-file-opening stat. *) let stat path = diff --git a/eio/tar_eio.mli b/eio/tar_eio.mli index 0b738b6..e2877fc 100644 --- a/eio/tar_eio.mli +++ b/eio/tar_eio.mli @@ -20,14 +20,14 @@ type t val value : ('a, 'err) result -> ('a, 'err, t) Tar.t -val run : ('a, [> `Unexpected_end_of_file] as 'b, t) Tar.t -> Eio.Flow.source -> ('a, 'b) result +val run_read_only : ('a, [> `Unexpected_end_of_file] as 'b, t) Tar.t -> [> `R ] Eio.Flow.source -> ('a, 'b) result val fold : (?global:Tar.Header.Extended.t -> Tar.Header.t -> 'a -> ('a, [> `Fatal of Tar.error | `Unexpected_end_of_file ] as 'b, t) Tar.t) -> - Eio.Fs.dir Eio.Path.t -> + Eio.Fs.dir_ty Eio.Path.t -> 'a -> ('a, 'b) result @@ -38,33 +38,33 @@ val header_of_file : ?level:Tar.Header.compatibility -> ?getpwuid:(int64 -> string) -> ?getgrgid:(int64 -> string) -> - Eio.Fs.dir Eio.Path.t -> + Eio.Fs.dir_ty Eio.Path.t -> Tar.Header.t val extract : ?filter:(Tar.Header.t -> bool) -> - src:Eio.Fs.dir Eio.Path.t -> - Eio.Fs.dir Eio.Path.t -> + src:Eio.Fs.dir_ty Eio.Path.t -> + Eio.Fs.dir_ty Eio.Path.t -> (unit, _) result val create : ?level:Tar.Header.compatibility -> ?global:Tar.Header.Extended.t -> ?filter:(Tar.Header.t -> bool) -> - src:Eio.Fs.dir Eio.Path.t -> - Eio.Fs.dir Eio.Path.t -> + src:Eio.Fs.dir_ty Eio.Path.t -> + Eio.Fs.dir_ty Eio.Path.t -> (unit, _) result val append_file : ?level:Tar.Header.compatibility -> ?header:Tar.Header.t -> - Eio.Fs.dir Eio.Path.t -> - Eio.Flow.sink -> + Eio.Fs.dir_ty Eio.Path.t -> + [> `W ] Eio.Flow.sink -> (unit, _) result val write_header : ?level:Tar.Header.compatibility -> - Tar.Header.t -> Eio.Flow.sink -> + Tar.Header.t -> [> `W ] Eio.Flow.sink -> (unit, _) result val write_global_extended_header : ?level:Tar.Header.compatibility -> - Tar.Header.Extended.t -> Eio.Flow.sink -> + Tar.Header.Extended.t -> [> `W ] Eio.Flow.sink -> (unit, _) result -val write_end : Eio.Flow.sink -> (unit, _) result +val write_end : [> `W ] Eio.Flow.sink -> (unit, _) result diff --git a/tar-eio.opam b/tar-eio.opam index ce4e32e..f59ac83 100644 --- a/tar-eio.opam +++ b/tar-eio.opam @@ -22,7 +22,7 @@ bug-reports: "https://github.com/mirage/ocaml-tar/issues" depends: [ "dune" {>= "2.9"} "ocaml" {>= "5.00.0"} - "eio" {>= "0.10.0" & < "0.12"} + "eio" {>= "1.1" & < "1.2"} "tar" {= version} "odoc" {with-doc} ] From 724aa17094821ba51656c884df463c247528dd3c Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Wed, 31 Jul 2024 12:15:46 +0200 Subject: [PATCH 32/34] Delete useless assert false in tar_gz.ml implementation --- lib/tar_gz.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/tar_gz.ml b/lib/tar_gz.ml index 56debe1..0633288 100644 --- a/lib/tar_gz.ml +++ b/lib/tar_gz.ml @@ -113,13 +113,13 @@ let in_gzipped t = = fun decoder -> function | Tar.Really_read len -> really_read_through_gz decoder len - | Tar.Read _len -> assert false (* XXX(dinosaure): actually does not emit [Tar.Read]. *) + | Tar.Read _len as v -> v | Tar.Seek len -> seek_through_gz decoder len | Tar.Return _ as ret -> ret | Tar.Bind (x, f) -> Tar.Bind (go decoder x, (fun x -> go decoder (f x))) | Tar.High _ as high -> high - | Tar.Write _ -> assert false in + | Tar.Write _ as v -> v in let decoder = let oc_buffer = De.bigstring_create 0x1000 in { gz= Gz.Inf.decoder `Manual ~o:oc_buffer From da4b1eb9fb903b3e6641b09e712156bd4a826f84 Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Wed, 31 Jul 2024 12:16:00 +0200 Subject: [PATCH 33/34] Also update the dune-project which generes opam files --- dune-project | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dune-project b/dune-project index 480654e..ab04d55 100644 --- a/dune-project +++ b/dune-project @@ -89,7 +89,7 @@ (tags ("org:xapi-project" "org:mirage")) (depends (ocaml (>= 5.00.0)) - (eio (and (>= 0.10.0) (< 0.12))) + (eio (and (>= 1.1) (< 1.2))) (tar (= :version)) ) ) From 4215ff02d87486ade54e1a3ede43cce476f791cf Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Thu, 1 Aug 2024 13:38:27 +0200 Subject: [PATCH 34/34] Fix the Tar_gz implementation --- lib/tar_gz.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/tar_gz.ml b/lib/tar_gz.ml index 0633288..26fc22a 100644 --- a/lib/tar_gz.ml +++ b/lib/tar_gz.ml @@ -141,7 +141,7 @@ let rec until_await oc_pos oc_buffer = function let max = De.bigstring_length oc_buffer - Gz.Def.dst_rem gz - oc_pos in let len = min 0x100 max in let res = Bytes.create len in - bigstring_blit_bytes oc_buffer ~src_off:0 res ~dst_off:0 ~len; + bigstring_blit_bytes oc_buffer ~src_off:oc_pos res ~dst_off:0 ~len; let* () = Tar.write (Bytes.unsafe_to_string res) in if len > 0 then until_await (oc_pos + len) oc_buffer state else @@ -157,7 +157,7 @@ let rec until_end oc_pos oc_buffer = function let max = De.bigstring_length oc_buffer - Gz.Def.dst_rem gz - oc_pos in let len = min 0x100 max in let res = Bytes.create len in - bigstring_blit_bytes oc_buffer ~src_off:0 res ~dst_off:0 ~len; + bigstring_blit_bytes oc_buffer ~src_off:oc_pos res ~dst_off:0 ~len; let* () = Tar.write (Bytes.unsafe_to_string res) in if len > 0 then until_end (oc_pos + len) oc_buffer state else match state with