diff --git a/.gitignore b/.gitignore index 1cb352b..74c3ab3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .vscode/ /obelisk /*.html +/*.htm /*.gz /cookies.txt \ No newline at end of file diff --git a/README.md b/README.md index 9819590..aed4d36 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,8 @@ There are some CLI behavior that I think need to be explained more here : .developers.google.com TRUE / FALSE 1642167486 KEY VALUE ``` -- If `--output` flag is not specified and there is only one URL to process (either from input file or from CLI arguments) then the default output will be `stdout`. However, if there are more than one URL then Obelisk will generate file name for the archive. +- If `--output` flag is not specified then Obelisk will generate file name for the archive and save it in current working directory. +- If `--output` flag is set to `-` and there is only one URL to process (either from input file or from CLI arguments) then the default output will be `stdout`. - If `--output` flag is specified but there are more than one URL to process, Obelisk will generate file name for the archive, but keep using the directory from the specified output path. - If `--output` flag is specified but it sets to an existing directory, Obelisk will also generate file name for the archive. diff --git a/cmd/obelisk/main.go b/cmd/obelisk/main.go index 4515a3f..979e07b 100644 --- a/cmd/obelisk/main.go +++ b/cmd/obelisk/main.go @@ -9,6 +9,7 @@ import ( nurl "net/url" "os" fp "path/filepath" + "strings" "time" "github.com/go-shiori/obelisk" @@ -53,7 +54,6 @@ func cmdHandler(cmd *cobra.Command, args []string) error { // Parse flags inputPath, _ := cmd.Flags().GetString("input") outputPath, _ := cmd.Flags().GetString("output") - outputSpecified := cmd.Flags().Changed("output") cookiesFilePath, _ := cmd.Flags().GetString("load-cookies") userAgent, _ := cmd.Flags().GetString("user-agent") @@ -83,10 +83,12 @@ func cmdHandler(cmd *cobra.Command, args []string) error { return fmt.Errorf("no url to process") } - // Prepare output name and dir + // Prepare output target outputDir := "" outputFileName := "" - if outputSpecified { + useStdout := outputPath == "-" && len(urls) == 1 + + if outputPath != "" && !useStdout { if isDirectory(outputPath) { outputDir = outputPath } else { @@ -145,11 +147,12 @@ func cmdHandler(cmd *cobra.Command, args []string) error { // Create request var reqCookies []*http.Cookie if len(cookiesMap) != 0 { - hostName := url.Hostname() - domainName := getDomainName(hostName) - reqCookies = append(reqCookies, cookiesMap[hostName]...) - reqCookies = append(reqCookies, cookiesMap["."+hostName]...) - reqCookies = append(reqCookies, cookiesMap["."+domainName]...) + parts := strings.Split(url.Hostname(), ".") + for i := 0; i < len(parts)-1; i++ { + domainName := strings.Join(parts[i:], ".") + reqCookies = append(reqCookies, cookiesMap[domainName]...) + reqCookies = append(reqCookies, cookiesMap["."+domainName]...) + } } req := obelisk.Request{ @@ -169,7 +172,7 @@ func cmdHandler(cmd *cobra.Command, args []string) error { // Prepare output var output io.Writer - if len(urls) == 1 && !outputSpecified { + if useStdout { output = os.Stdout } else { fileName := outputFileName diff --git a/cmd/obelisk/utils.go b/cmd/obelisk/utils.go index 9a2645c..e3b6f02 100644 --- a/cmd/obelisk/utils.go +++ b/cmd/obelisk/utils.go @@ -78,19 +78,10 @@ func parseCookiesFile(path string) (map[string][]*http.Cookie, error) { return mapCookies, nil } -func getDomainName(hostname string) string { - parts := strings.Split(hostname, ".") - if len(parts) <= 2 { - return hostname - } - - return strings.Join(parts[len(parts)-2:], ".") -} - func createFileName(url *nurl.URL, contentType string) string { // Prepare current time and domain name now := time.Now().Format("2006-01-01-150405") - domainName := getDomainName(url.Hostname()) + domainName := strings.TrimPrefix(url.Hostname(), "www.") domainName = strings.ReplaceAll(domainName, ".", "-") // Get file extension @@ -105,8 +96,8 @@ func createFileName(url *nurl.URL, contentType string) string { } baseName := pth.Base(url.Path) - if parts := strings.Split(baseName, "-"); len(parts) > 5 { - baseName = strings.Join(parts[:5], "-") + if parts := strings.Split(baseName, "-"); len(parts) > 4 { + baseName = strings.Join(parts[:4], "-") } return fmt.Sprintf("%s-%s-%s%s", now, domainName, baseName, extension)