Skip to content

Commit

Permalink
Merge pull request #46 from lukasmartinelli/travis
Browse files Browse the repository at this point in the history
Fix travis build
  • Loading branch information
lukasmartinelli authored Jul 7, 2018
2 parents d8c60d5 + 3ed0890 commit fd8adf8
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 154 deletions.
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ sudo: false
addons:
postgresql: 9.4
go:
- 1.4
- 1.8
- 1.9
- tip
cache:
directories:
Expand All @@ -13,6 +14,7 @@ install:
- go get github.com/lib/pq
- go get github.com/kennygrant/sanitize
- go get github.com/cheggaaa/pb
- go get github.com/JensRantil/go-csv
- ./download_samples.sh
script:
- go install && ./test.sh
Expand Down
15 changes: 4 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<img align="right" alt="elephant" src="elephant.jpg" />

Import CSV and JSON into PostgreSQL the easy way.
Import CSV (RFC 4180) and JSON into PostgreSQL the easy way.
This small tool abstract all the hassles and swearing you normally
have to deal with when you just want to dump some data into the database.

Expand All @@ -13,6 +13,7 @@ Features:
- Easy deployment
- Dealing with import errors
- Import over the network
- Only supports UTF8 encoding

> Check out [pgclimb](https://github.com/lukasmartinelli/pgclimb) for exporting data from PostgreSQL into different data formats.
Expand Down Expand Up @@ -232,15 +233,6 @@ This works the same for invalid JSON objects.
pgfutter csv --table violations traffic_violations.csv
```

### Import single JSON object

Instead of using JSON lines you can also [import a single JSON object](https://github.com/lukasmartinelli/pgfutter/issues/9)
into the database. This will load the JSON document into memory first.

```bash
pgfutter jsonobj document.json
```

## Alternatives

For more sophisticated needs you should take a look at [pgloader](http://pgloader.io).
Expand All @@ -267,5 +259,6 @@ We use [gox](https://github.com/mitchellh/gox) to create distributable
binaries for Windows, OSX and Linux.

```bash
docker run --rm -v "$(pwd)":/usr/src/pgfutter -w /usr/src/pgfutter tcnksm/gox:1.4.2-light
docker run --rm -v "$(pwd)":/usr/src/pgfutter -w /usr/src/pgfutter tcnksm/gox:1.9

```
26 changes: 17 additions & 9 deletions csv.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package main

import (
"encoding/csv"
"errors"
"fmt"
"io"
Expand All @@ -10,6 +9,7 @@ import (
"unicode/utf8"

"github.com/cheggaaa/pb"
csv "github.com/JensRantil/go-csv"
)

func containsDelimiter(col string) bool {
Expand Down Expand Up @@ -48,7 +48,9 @@ func parseColumns(reader *csv.Reader, skipHeader bool, fields string) ([]string,
}
} else {
columns, err = reader.Read()
fmt.Printf("%v columns\n%v\n", len(columns), columns)
if err != nil {
fmt.Printf("FOUND ERR\n")
return nil, err
}
}
Expand Down Expand Up @@ -120,14 +122,25 @@ func copyCSVRows(i *Import, reader *csv.Reader, ignoreErrors bool, delimiter str
return nil, success, failed
}

func importCSV(filename string, connStr string, schema string, tableName string, ignoreErrors bool, skipHeader bool, fields string, delimiter string) error {
func importCSV(filename string, connStr string, schema string, tableName string, ignoreErrors bool, skipHeader bool, fields string, delimiter string, excel bool) error {

db, err := connect(connStr, schema)
if err != nil {
return err
}
defer db.Close()

dialect := csv.Dialect{}
dialect.Delimiter, _ = utf8.DecodeRuneInString(delimiter)

// Excel 2008 and 2011 and possibly other versions uses a carriage return \r
// rather than a line feed \n as a newline
if excel {
dialect.LineTerminator = "\r"
} else {
dialect.LineTerminator = "\n"
}

var reader *csv.Reader
var bar *pb.ProgressBar
if filename != "" {
Expand All @@ -138,21 +151,16 @@ func importCSV(filename string, connStr string, schema string, tableName string,
defer file.Close()

bar = NewProgressBar(file)
reader = csv.NewReader(io.TeeReader(file, bar))
reader = csv.NewDialectReader(io.TeeReader(file, bar), dialect)
} else {
reader = csv.NewReader(os.Stdin)
reader = csv.NewDialectReader(os.Stdin, dialect)
}

reader.Comma, _ = utf8.DecodeRuneInString(delimiter)
reader.LazyQuotes = true

columns, err := parseColumns(reader, skipHeader, fields)
if err != nil {
return err
}

reader.FieldsPerRecord = len(columns)

i, err := NewCSVImport(db, schema, tableName, columns)
if err != nil {
return err
Expand Down
32 changes: 15 additions & 17 deletions download_samples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,28 @@ SAMPLES_DIR="$CWD/samples"
function download_json_samples() {
mkdir -p $SAMPLES_DIR
cd $SAMPLES_DIR
wget -nc http://data.githubarchive.org/2015-01-01-15.json.gz && gunzip -f 2015-01-01-15.json.gz
wget -nc wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/json_sample_2015-01-01-15.json
cd $CWD
}

function download_csv_samples() {
mkdir -p $SAMPLES_DIR
cd $SAMPLES_DIR
wget -nc -O local_severe_wheather_warning_systems.csv https://data.mo.gov/api/views/n59h-ggai/rows.csv
wget -nc -O montgomery_crime.csv https://data.montgomerycountymd.gov/api/views/icn6-v9z3/rows.csv
wget -nc -O employee_salaries.csv https://data.montgomerycountymd.gov/api/views/54rh-89p8/rows.csv
wget -nc -O residential_permits.csv https://data.montgomerycountymd.gov/api/views/m88u-pqki/rows.csv
wget -nc -O customer_complaints.csv https://data.consumerfinance.gov/api/views/x94z-ydhh/rows.csv
wget -nc -O traffic_violations.csv https://data.montgomerycountymd.gov/api/views/4mse-ku6q/rows.csv
wget -nc -O distribution_of_wealth_switzerland.csv http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Distribution_of_wealth.csv
wget -nc http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Wealth_groups.csv
wget -nc http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Vermoegensklassen.csv
wget -nc http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Steuertarife.csv
wget -nc http://bar-opendata-ch.s3.amazonaws.com/Kanton-ZH/Statistik/Tax_rates.csv
wget -nc -O whitehouse_visits_2014.zip https://www.whitehouse.gov/sites/default/files/disclosures/whitehouse_waves-2014_12.csv_.zip && unzip -o whitehouse_visits_2014.zip && rm -f whitehouse_visits_2014.csv && mv whitehouse_waves-2014_12.csv.csv whitehouse_visits_2014.csv
wget -nc http://bar-opendata-ch.s3.amazonaws.com/ch.bag/Spitalstatistikdateien/qip/2012/qip12_tabdaten.csv
wget -nc http://bar-opendata-ch.s3.amazonaws.com/ch.bar.bar-02/Metadatenbank-Vernehmlassungen-OGD-V1-3.csv
wget -nc https://www.data.gov/app/uploads/2015/08/opendatasites.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_distribution_of_wealth_switzerland.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_employee_salaries.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_local_severe_wheather_warning_systems.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_montgomery_crime.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_qip12_tabdaten.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_residential_permits.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_sacramentocrime_jan_2006.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_sacramento_realestate_transactions.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_sales_jan_2009.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_steuertarife.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_techcrunch_continental_usa.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_vermoegensklassen.csv
wget -nc https://github.com/lukasmartinelli/pgfutter/releases/download/v0.1-alpha/csv_sample_metadatenbank.csv
cd $CWD
}

download_csv_samples
download_json_samples
download_csv_samples
37 changes: 0 additions & 37 deletions json.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"os"
)

Expand Down Expand Up @@ -67,42 +66,6 @@ func copyJSONRows(i *Import, reader *bufio.Reader, ignoreErrors bool) (error, in
return nil, success, failed
}

func importJSONObject(filename string, connStr string, schema string, tableName string, dataType string) error {
db, err := connect(connStr, schema)
if err != nil {
return err
}
defer db.Close()

// The entire file is read into memory because we need to add
// it into the PostgreSQL transaction, this will hit memory limits
// for big JSON objects
var bytes []byte
if filename == "" {
bytes, err = ioutil.ReadAll(os.Stdin)
} else {
bytes, err = ioutil.ReadFile(filename)
}
if err != nil {
return err
}

i, err := NewJSONImport(db, schema, tableName, "data", dataType)
if err != nil {
return err
}

// The JSON file is not validated at client side
// it is just copied into the database
// If the JSON file is corrupt PostgreSQL will complain when querying
err = i.AddRow(string(bytes))
if err != nil {
return err
}

return i.Commit()
}

func importJSON(filename string, connStr string, schema string, tableName string, ignoreErrors bool, dataType string) error {

db, err := connect(connStr, schema)
Expand Down
26 changes: 6 additions & 20 deletions pgfutter.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package main

import (
"fmt"
"log"
"os"
"path/filepath"
Expand Down Expand Up @@ -121,27 +120,14 @@ func main() {
return err
},
},
{
Name: "jsonobj",
Usage: "Import single JSON object into database",
Action: func(c *cli.Context) error {
cli.CommandHelpTemplate = strings.Replace(cli.CommandHelpTemplate, "[arguments...]", "<json-file>", -1)

filename := c.Args().First()

schema := c.GlobalString("schema")
tableName := parseTableName(c, filename)
dataType := getDataType(c)

connStr := parseConnStr(c)
err := importJSONObject(filename, connStr, schema, tableName, dataType)
return err
},
},
{
Name: "csv",
Usage: "Import CSV into database",
Flags: []cli.Flag{
cli.BoolFlag{
Name: "excel",
Usage: "support problematic Excel 2008 and Excel 2011 csv line endings",
},
cli.BoolFlag{
Name: "skip-header",
Usage: "skip header row",
Expand Down Expand Up @@ -172,10 +158,10 @@ func main() {
skipHeader := c.Bool("skip-header")
fields := c.String("fields")
skipParseheader := c.Bool("skip-parse-delimiter")
excel := c.Bool("excel")
delimiter := parseDelimiter(c.String("delimiter"), skipParseheader)
fmt.Println(delimiter)
connStr := parseConnStr(c)
err := importCSV(filename, connStr, schema, tableName, ignoreErrors, skipHeader, fields, delimiter)
err := importCSV(filename, connStr, schema, tableName, ignoreErrors, skipHeader, fields, delimiter, excel)
return err
},
},
Expand Down
Loading

0 comments on commit fd8adf8

Please sign in to comment.