Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added organize feature #64

Merged
merged 4 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions internal/cli/organize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/

package cli

import (
"errors"
"fmt"

"github.com/spf13/cobra"
"github.com/unidoc/unipdf-cli/pkg/pdf"
)

const organizeCmdDesc = `Split PDF files.

The command is used to organize one or more page ranges from the input file
and save the result as the output file.
If no page range is specified, all the pages from the input file will be
copied to the output file.

An example of the pages parameter: 1-3,4,6-7
Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be present in the output file,
while page number 5 is skipped.
`

var organizeCmdExample = fmt.Sprintf("%s\n%s\n",
fmt.Sprintf("%s organize input_file.pdf output_file.pdf 1-2", appName),
fmt.Sprintf("%s organize -p pass input_file.pd output_file.pdf 1-2,4", appName),
)

// organizeCmd represents the split command.
var organizeCmd = &cobra.Command{
Use: "organize [FLAG]... INPUT_FILE OUTPUT_FILE [PAGES]",
Short: "Organize PDF files",
Long: organizeCmdDesc,
Example: organizeCmdExample,
DisableFlagsInUseLine: true,
Run: func(cmd *cobra.Command, args []string) {
inputPath := args[0]
outputPath := args[1]
password, _ := cmd.Flags().GetString("password")

// Parse page range.
var err error
var pages []int

if len(args) > 2 {
if pages, err = parsePageRangeUnsorted(args[2]); err != nil {
printUsageErr(cmd, "Invalid page range specified\n")
}
}

if err := pdf.Organize(inputPath, outputPath, password, pages); err != nil {
printErr("Error: %s\n", err)
}

fmt.Printf("Successfully organized file %s\n", inputPath)
fmt.Printf("Output file saved to %s\n", outputPath)
},
Args: func(_ *cobra.Command, args []string) error {
if len(args) < 2 {
return errors.New("must provide at least the input and output files")
}

return nil
},
}

func init() {
rootCmd.AddCommand(organizeCmd)

organizeCmd.Flags().StringP("password", "p", "", "input file password")
}
56 changes: 55 additions & 1 deletion internal/cli/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ func parsePageRange(pageRange string) ([]int, error) {
}

indices := strings.Split(rng, "-")

lenIndices := len(indices)
if lenIndices > 2 {
return nil, errors.New("invalid page range")
Expand Down Expand Up @@ -86,6 +85,61 @@ func parsePageRange(pageRange string) ([]int, error) {
return pages, nil
}

func parsePageRangeUnsorted(pageRange string) ([]int, error) {
var pages []int

rngs := strings.Split(removeSpaces(pageRange), ",")
for _, rng := range rngs {
if rng == "" {
continue
}

indices := strings.Split(rng, "-")
lenIndices := len(indices)
if lenIndices > 2 {
return nil, errors.New("invalid page range")
}
if lenIndices == 2 {
start, err := strconv.Atoi(indices[0])
if err != nil {
return nil, errors.New("invalid start page number")
}
if start < 1 {
return nil, errors.New("page range start must be greater than 0")
}

end, err := strconv.Atoi(indices[1])
if err != nil {
return nil, errors.New("invalid end page number")
}
if end < 1 {
return nil, errors.New("page range end must be greater than 0")
}

if start > end {
return nil, errors.New("page range end must be greater than the start")
}

for page := start; page <= end; page++ {
pages = append(pages, page)
}

continue
}

page, err := strconv.Atoi(indices[0])
if err != nil {
return nil, errors.New("invalid page number")
}

pages = append(pages, page)
}

pages = uniqueIntSlice(pages)

return pages, nil
}

func parseInputPaths(inputPaths []string, recursive bool, matcher fileMatcher) ([]string, error) {
var err error
var files []string
Expand Down
152 changes: 152 additions & 0 deletions pkg/pdf/organize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/

package pdf

import (
"github.com/unidoc/unipdf/v3/common"
unipdf "github.com/unidoc/unipdf/v3/model"
)

// Organize extracts the provided page list from PDF file specified by the
// inputPath parameter then merges the individual pages and saves the
// resulting file at the location specified by the outputPath parameter.
// A password can be passed in for encrypted input files.
func Organize(inputPath, outputPath, password string, pages []int) error {
// Read input file.
pdfReader, _, _, _, err := readPDF(inputPath, password)
if err != nil {
return err
}

// Add selected pages to the writer.
pdfWriter := unipdf.NewPdfWriter()

for i := 0; i < len(pages); i++ {
page, err := pdfReader.GetPage(pages[i])
if err != nil {
return err
}

err = pdfWriter.AddPage(page)
if err != nil {
return err
}
}

// Copy PDF version.
version := pdfReader.PdfVersion()
pdfWriter.SetVersion(version.Major, version.Minor)

// Copy PDF info.
info, err := pdfReader.GetPdfInfo()
if err != nil {
common.Log.Debug("ERROR: %v", err)
} else {
pdfWriter.SetDocInfo(info)
}

// Copy Catalog Metadata.
if meta, ok := pdfReader.GetCatalogMetadata(); ok {
if err := pdfWriter.SetCatalogMetadata(meta); err != nil {
return err
}
}

// Copy catalog mark information.
if markInfo, ok := pdfReader.GetCatalogMarkInfo(); ok {
if err := pdfWriter.SetCatalogMarkInfo(markInfo); err != nil {
return err
}
}

// Copy AcroForm.
err = pdfWriter.SetForms(pdfReader.AcroForm)
if err != nil {
common.Log.Debug("ERROR: %v", err)
return err
}

// Copy viewer preferences.
if pref, ok := pdfReader.GetCatalogViewerPreferences(); ok {
if err := pdfWriter.SetCatalogViewerPreferences(pref); err != nil {
return err
}
}

// Copy language preferences.
if lang, ok := pdfReader.GetCatalogLanguage(); ok {
if err := pdfWriter.SetCatalogLanguage(lang); err != nil {
return err
}
}

// Copy document outlines.
pdfWriter.AddOutlineTree(pdfReader.GetOutlineTree())

// Copy OC Properties.
props, err := pdfReader.GetOCProperties()
if err != nil {
common.Log.Debug("ERROR: %v", err)
} else {
err = pdfWriter.SetOCProperties(props)
if err != nil {
common.Log.Debug("ERROR: %v", err)
}
}

// Copy page labels.
labelObj, err := pdfReader.GetPageLabels()
if err != nil {
common.Log.Debug("ERROR: %v", err)
} else {
err = pdfWriter.SetPageLabels(labelObj)
if err != nil {
common.Log.Debug("ERROR: %v", err)
}
}

// Copy named destinations.
namedDest, err := pdfReader.GetNamedDestinations()
if err != nil {
common.Log.Debug("ERROR: %v", err)
} else {
err = pdfWriter.SetNamedDestinations(namedDest)
if err != nil {
common.Log.Debug("ERROR: %v", err)
}
}

// Copy name dictionary.
nameDict, err := pdfReader.GetNameDictionary()
if err != nil {
common.Log.Debug("ERROR: %v", err)
} else {
err = pdfWriter.SetNameDictionary(nameDict)
if err != nil {
common.Log.Debug("ERROR: %v", err)
}
}

// Copy StructTreeRoot dictionary.
structTreeRoot, found := pdfReader.GetCatalogStructTreeRoot()
if found {
err := pdfWriter.SetCatalogStructTreeRoot(structTreeRoot)
if err != nil {
common.Log.Debug("ERROR: %v", err)
}
}

// Copy global page rotation.
if pdfReader.Rotate != nil {
if err := pdfWriter.SetRotation(*pdfReader.Rotate); err != nil {
common.Log.Debug("ERROR: %v", err)
}
}

// Write output file.
safe := inputPath == outputPath
return writePDF(outputPath, &pdfWriter, safe)
}
Loading