Skip to content

Commit

Permalink
Update: Tar/Unzip
Browse files Browse the repository at this point in the history
* Implement `-u` short hand for `--unzip`
* `--unzip` option for invoking the unzip consumer added
* multifile mode utilizes `--unzip/-u` and `--extract/-x` for tar and
  unzip modes

* Improved Debugging logs for tar and unzip
* Update README
  • Loading branch information
tempusfrangit committed Feb 19, 2024
1 parent 27a5054 commit c21731c
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 20 deletions.
22 changes: 14 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,6 @@ This builds a static binary that can work inside containers.
- -c concurrency: The number of concurrent downloads. Default is 4 times the number of cores.
- -x: Extract the tar file after download. If not set, the downloaded file will be saved as is.

#### Default-Mode Command-Line Options
- `-x`, `--extract`
- Extract archive after download
- Type: `bool`
- Default: `false`

#### Example

Expand Down Expand Up @@ -101,9 +96,9 @@ https://example.com/music.mp3 /local/path/to/music.mp3

### Global Command-Line Options
- `--max-chunks`
- Maximum number of chunks for downloading a given file
- Type: `Integer`
- Default: `4 * runtime.NumCPU()`
- Maximum number of chunks for downloading a given file
- Type: `Integer`
- Default: `4 * runtime.NumCPU()`
- `--connect-timeout`
- Timeout for establishing a connection, format is <number><unit>, e.g. 10s
- Type: `Duration`
Expand Down Expand Up @@ -131,6 +126,17 @@ https://example.com/music.mp3 /local/path/to/music.mp3
- Verbose mode (equivalent to `--log-level debug`)
- Type: `bool`
- Default: `false`
- `-x`, `--extract`
- Extract archive after download
- Type: `bool`
- Default: `false`
- In multifile mode this option will only extract tar files where `content-type` header is `application/x-tar`. This option may be combined with `--unzip` only in multifile mode.
- `-u`, `--unzip`
- Unzip archive after download
- Type: `bool`
- Default: `false`
- In multifile mode this option will only extract tar files where `content-type` header is `application/zip`. This option may be combined with `--extract` only in multifile mode.


#### Deprecated
- `--concurrency` (deprecated, use `--max-chunks` instead)
Expand Down
12 changes: 2 additions & 10 deletions cmd/multifile/multifile.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,6 @@ const multifileExamples = `
cat multifile.txt | pget multifile -
`

const (
OptUnzip = "unzip"
OptTarExtract = "tar"
)

// test seam
type Getter interface {
DownloadFile(ctx context.Context, url string, dest string) (int64, time.Duration, error)
Expand All @@ -58,9 +53,6 @@ func GetCommand() *cobra.Command {
Example: multifileExamples,
}

cmd.Flags().BoolP(OptUnzip, "u", false, "Extract .zip files in multifile mode")
cmd.Flags().BoolP(OptTarExtract, "t", false, "Extract .tar files in multifile mode")

err := viper.BindPFlags(cmd.PersistentFlags())
if err != nil {
fmt.Println(err)
Expand Down Expand Up @@ -139,14 +131,14 @@ func multifileExecute(ctx context.Context, manifest pget.Manifest) error {
}

// Handle zip extraction if unzip flag is set
if viper.GetBool(OptUnzip) {
if viper.GetBool(config.OptUnzip) {
if err := consumer.addConsumer("application/zip", config.ConsumerZipExtractor); err != nil {
return err
}
}

// Handle tar extraction if tar flag is set
if viper.GetBool(OptUnzip) {
if viper.GetBool(config.OptUnzip) {
if err := consumer.addConsumer("application/x-tar", config.ConsumerTarExtractor); err != nil {
return err
}
Expand Down
14 changes: 13 additions & 1 deletion cmd/root/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ func GetCommand() *cobra.Command {
Long: rootLongDesc,
PersistentPreRunE: rootPersistentPreRunEFunc,
PersistentPostRunE: rootPersistentPostRunEFunc,
PreRunE: rootPreRunEFunc,
PreRun: rootCmdPreRun,
RunE: runRootCMD,
Args: cobra.ExactArgs(2),
Example: ` pget https://example.com/file.tar ./target-dir`,
}
cmd.Flags().BoolP(config.OptExtract, "x", false, "OptExtract archive after download")
cmd.SetUsageTemplate(cli.UsageTemplate)
config.ViperInit()
if err := persistentFlags(cmd); err != nil {
Expand Down Expand Up @@ -119,6 +119,13 @@ func rootPersistentPostRunEFunc(cmd *cobra.Command, args []string) error {
return nil
}

func rootPreRunEFunc(cmd *cobra.Command, args []string) error {
if viper.GetBool(config.OptExtract) && viper.GetBool(config.OptUnzip) {
return fmt.Errorf("cannot use --unzip and --extract together")
}
return nil
}

func persistentFlags(cmd *cobra.Command) error {
// Persistent Flags (applies to all commands/subcommands)
cmd.PersistentFlags().IntVarP(&concurrency, config.OptConcurrency, "c", runtime.GOMAXPROCS(0)*4, "Maximum number of concurrent downloads/maximum number of chunks for a given file")
Expand All @@ -134,6 +141,8 @@ func persistentFlags(cmd *cobra.Command) error {
cmd.PersistentFlags().Int(config.OptMaxConnPerHost, 40, "Maximum number of (global) concurrent connections per host")
cmd.PersistentFlags().StringP(config.OptOutputConsumer, "o", "file", "Output Consumer (file, tar, null)")
cmd.PersistentFlags().String(config.OptPIDFile, defaultPidFilePath(), "PID file path")
cmd.PersistentFlags().BoolP(config.OptExtract, "x", false, "Extract tar archive after download")
cmd.PersistentFlags().BoolP(config.OptUnzip, "u", false, "Unzip archive after download")

if err := config.AddFlagAlias(cmd, config.OptConcurrency, config.OptMaxChunks); err != nil {
return err
Expand Down Expand Up @@ -169,6 +178,9 @@ func rootCmdPreRun(cmd *cobra.Command, args []string) {
if currentConsumer != config.ConsumerFile && currentConsumer != config.ConsumerTarExtractor {
log.Warn().Msg("Tar Extract Enabled, overriding output consumer to `tar-extractor`")
}
if currentConsumer != config.ConsumerFile && currentConsumer != config.ConsumerZipExtractor {
log.Warn().Msg("Unzip Enabled, overriding output consumer to `unzip`")
}
viper.Set(config.OptOutputConsumer, config.ConsumerTarExtractor)
}
}
Expand Down
1 change: 1 addition & 0 deletions pkg/config/optnames.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@ const (
OptPIDFile = "pid-file"
OptResolve = "resolve"
OptRetries = "retries"
OptUnzip = "unzip"
OptVerbose = "verbose"
)
23 changes: 23 additions & 0 deletions pkg/extract/tar.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ func TarFile(reader io.Reader, destDir string, overwrite bool) error {
logger.Debug().
Str("extractor", "tar").
Str("status", "starting").
Bool("overwrite", overwrite).
Str("destDir", destDir).
Msg("Extract")
for {
header, err := tarReader.Next()
Expand All @@ -45,6 +47,10 @@ func TarFile(reader io.Reader, destDir string, overwrite bool) error {

switch header.Typeflag {
case tar.TypeDir:
logger.Debug().
Str("target", target).
Str("perms", fmt.Sprintf("%o", header.Mode)).
Msg("Tar: Directory")
if err := os.MkdirAll(target, os.FileMode(header.Mode)); err != nil {
return err
}
Expand All @@ -53,6 +59,10 @@ func TarFile(reader io.Reader, destDir string, overwrite bool) error {
if overwrite {
openFlags |= os.O_TRUNC
}
logger.Debug().
Str("target", target).
Str("perms", fmt.Sprintf("%o", header.Mode)).
Msg("Tar: File")
targetFile, err := os.OpenFile(target, openFlags, os.FileMode(header.Mode))
if err != nil {
return err
Expand All @@ -66,6 +76,10 @@ func TarFile(reader io.Reader, destDir string, overwrite bool) error {
}
case tar.TypeSymlink, tar.TypeLink:
// Defer creation of
logger.Debug().Str("link_type", string(header.Typeflag)).
Str("old_name", header.Linkname).
Str("new_name", target).
Msg("Tar: (Defer) Link")
links = append(links, &link{linkType: header.Typeflag, oldName: header.Linkname, newName: target})
default:
return fmt.Errorf("unsupported file type for %s, typeflag %s", header.Name, string(header.Typeflag))
Expand All @@ -86,6 +100,7 @@ func TarFile(reader io.Reader, destDir string, overwrite bool) error {
}

func createLinks(links []*link, destDir string, overwrite bool) error {
logger := logging.GetLogger()
for _, link := range links {
targetDir := filepath.Dir(link.newName)
if err := os.MkdirAll(targetDir, 0755); err != nil {
Expand All @@ -94,10 +109,18 @@ func createLinks(links []*link, destDir string, overwrite bool) error {
switch link.linkType {
case tar.TypeLink:
oldPath := filepath.Join(destDir, link.oldName)
logger.Debug().
Str("old_path", oldPath).
Str("new_path", link.newName).
Msg("Tar: creating hard link")
if err := createHardLink(oldPath, link.newName, overwrite); err != nil {
return fmt.Errorf("error creating hard link from %s to %s: %w", oldPath, link.newName, err)
}
case tar.TypeSymlink:
logger.Debug().
Str("old_path", link.oldName).
Str("new_path", link.newName).
Msg("Tar: creating symlink")
if err := createSymlink(link.oldName, link.newName, overwrite); err != nil {
return fmt.Errorf("error creating symlink from %s to %s: %w", link.oldName, link.newName, err)
}
Expand Down
16 changes: 15 additions & 1 deletion pkg/extract/zip.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,24 @@ import (
"os"
"path"
"path/filepath"

"github.com/replicate/pget/pkg/logging"
)

// ZipFile extracts a zip file to the given destination path.
func ZipFile(reader io.ReaderAt, destPath string, size int64, overwrite bool) error {
logger := logging.GetLogger()
err := os.MkdirAll(destPath, 0755)
if err != nil {
return fmt.Errorf("error creating destination directory: %w", err)
}

logger.Debug().
Str("extractor", "zip").
Str("status", "starting").
Bool("overwrite", overwrite).
Str("destDir", destPath).
Msg("Extract")
zipReader, err := zip.NewReader(reader, size)
if err != nil {
return fmt.Errorf("error creating zip reader: %w", err)
Expand All @@ -42,8 +51,11 @@ func handleFileFromZip(file *zip.File, outputDir string, overwrite bool) error {
}

func extractDir(file *zip.File, outputDir string) error {
logger := logging.GetLogger()
target := path.Join(outputDir, file.Name)
// Strip setuid/setgid/sticky bits
perms := file.Mode().Perm() &^ os.ModeSetuid &^ os.ModeSetgid &^ os.ModeSticky
logger.Debug().Str("target", target).Str("perms", fmt.Sprintf("%o", perms)).Msg("Unzip: directory")
info, err := os.Stat(target)
if err == nil && !info.IsDir() {
return fmt.Errorf("error creating directory: %s already exists and is not a directory", target)
Expand All @@ -66,6 +78,7 @@ func extractDir(file *zip.File, outputDir string) error {
}

func extractFile(file *zip.File, outputDir string, overwrite bool) error {
logger := logging.GetLogger()
target := path.Join(outputDir, file.Name)
targetDir := filepath.Dir(target)
err := os.MkdirAll(targetDir, 0755)
Expand All @@ -85,8 +98,9 @@ func extractFile(file *zip.File, outputDir string, overwrite bool) error {
if overwrite {
openFlags |= os.O_TRUNC
}
// Do not apply setuid/gid/sticky bits.
// Strip setuid/gid/sticky bits.
perms := file.Mode().Perm() &^ os.ModeSetuid &^ os.ModeSetgid &^ os.ModeSticky
logger.Debug().Str("target", target).Str("perms", fmt.Sprintf("%o", perms)).Msg("Unzip: file")
out, err := os.OpenFile(target, openFlags, perms)
if err != nil {
return fmt.Errorf("error creating file: %w", err)
Expand Down

0 comments on commit c21731c

Please sign in to comment.