diff --git a/api/testdata/api.snapshot b/api/testdata/api.snapshot index fc1e603d4..e603ddb2d 100755 Binary files a/api/testdata/api.snapshot and b/api/testdata/api.snapshot differ diff --git a/base/dsfs/dataset.go b/base/dsfs/dataset.go index c13b9258f..adced1cca 100644 --- a/base/dsfs/dataset.go +++ b/base/dsfs/dataset.go @@ -475,6 +475,8 @@ func generateCommitDescriptions(prev, ds *dataset.Dataset, force bool) (short, l return "created dataset", "created dataset", nil } + ctx := context.TODO() + // TODO(dlong): Inline body if it is a reasonable size, in order to get information about // how the body has changed. // TODO(dlong): Also should ignore derived fields, like structure.{checksum,entries,length}. @@ -491,13 +493,12 @@ func generateCommitDescriptions(prev, ds *dataset.Dataset, force bool) (short, l return "", "", err } - stat := deepdiff.Stats{} - diff, err := deepdiff.Diff(prevData, nextData, deepdiff.OptionSetStats(&stat)) + diff, stat, err := deepdiff.New().StatDiff(ctx, prevData, nextData) if err != nil { return "", "", err } - shortTitle, longMessage := friendly.DiffDescriptions(diff, &stat) + shortTitle, longMessage := friendly.DiffDescriptions(diff, stat) if shortTitle == "" { if force { return "forced update", "forced update", nil diff --git a/base/dsfs/dataset_test.go b/base/dsfs/dataset_test.go index 14b18b93e..c1443c99a 100644 --- a/base/dsfs/dataset_test.go +++ b/base/dsfs/dataset_test.go @@ -479,37 +479,23 @@ func TestGenerateCommitMessage(t *testing.T) { &dataset.Dataset{ Structure: &dataset.Structure{Format: "json"}, Body: toqtype.MustParseJSONAsArray(`[ - { - "fruit": "apple", "color": "red" - }, - { - "fruit": "banana", "color": "yellow" - }, - { - "fruit": "cherry", "color": "red" - } + { "fruit": "apple", "color": "red" }, + { "fruit": "banana", "color": "yellow" }, + { "fruit": "cherry", "color": "red" } ]`), }, &dataset.Dataset{ Structure: &dataset.Structure{Format: "json"}, Body: toqtype.MustParseJSONAsArray(`[ - { - "fruit": "apple", "color": "red" - }, - { - "fruit": "blueberry", "color": "blue" - }, - { - "fruit": "cherry", "color": "red" - }, - { - "fruit": "durian", "color": "green" - } + { "fruit": "apple", "color": "red" }, + { "fruit": "blueberry", "color": "blue" }, + { "fruit": "cherry", "color": "red" }, + { "fruit": "durian", "color": "green" } ]`), }, false, - "body replaced row 1 and added row 3", - "body:\n\treplaced row 1\n\tadded row 3", + "body updated row 1 and added row 3", + "body:\n\tupdated row 1\n\tadded row 3", }, { "body with lots of changes", @@ -665,7 +651,7 @@ hen,twenty-nine,30`), } for _, c := range goodCases { - t.Run(fmt.Sprintf("%s", c.description), func(t *testing.T) { + t.Run(c.description, func(t *testing.T) { shortTitle, longMessage, err := generateCommitDescriptions(c.prev, c.ds, c.force) if err != nil { t.Errorf("error: %s", err.Error()) diff --git a/base/friendly/friendly.go b/base/friendly/friendly.go index 1d3fc82a8..f27baf518 100644 --- a/base/friendly/friendly.go +++ b/base/friendly/friendly.go @@ -8,7 +8,7 @@ import ( "github.com/qri-io/deepdiff" ) -var log = logger.Logger("dsfs") +var log = logger.Logger("friendly") const smallNumberOfChangesToBody = 3 @@ -25,7 +25,7 @@ func DiffDescriptions(deltas []*deepdiff.Delta, stats *deepdiff.Stats) (string, return "", "" } - deltas = preprocess(deltas) + deltas = preprocess(deltas, "") perComponentChanges := buildComponentChanges(deltas) // Data accumulated while iterating over the components. @@ -68,6 +68,14 @@ func DiffDescriptions(deltas []*deepdiff.Delta, stats *deepdiff.Stats) (string, // it for both the long message and short title. msg = fmt.Sprintf("%s:\n\t%s", compName, changes.Rows[0]) shortTitle = fmt.Sprintf("%s %s", compName, changes.Rows[0]) + } else if len(changes.Rows) == 0 && compName == "transform" { + // TODO (b5) - this is a hack to make TestSaveTransformWithoutChanges + // in the cmd package pass. We're getting to this stage with 0 rows of + // changes, which is making msg & title not-empty, which is in turn allowing + // a commit b/c it looks like a change. ideally we don't make it here at all, + // but we DEFINITELY need a better hueristic for dsfs.CreateDataset's + // change detection check. Maybe use diffstat? + continue } else { // If there were multiple changes, describe them all for the long message // but just show the number of changes for the short title. @@ -107,38 +115,46 @@ func DiffDescriptions(deltas []*deepdiff.Delta, stats *deepdiff.Stats) (string, return shortTitle, longMessage } -// preprocess makes delta lists easier to work with, by combining operations when possible -func preprocess(deltas []*deepdiff.Delta) []*deepdiff.Delta { +const dtReplace = deepdiff.Operation("replace") + +// preprocess makes delta lists easier to work with, by combining operations +// when possible & removing unwanted paths +func preprocess(deltas deepdiff.Deltas, path string) deepdiff.Deltas { build := make([]*deepdiff.Delta, 0, len(deltas)) for i, d := range deltas { if i > 0 { last := build[len(build)-1] - if last.Path == d.Path { + if last.Path.String() == d.Path.String() { if last.Type == deepdiff.DTDelete && d.Type == deepdiff.DTInsert { - last.Type = "replace" + last.Type = dtReplace continue } } } + + p := joinPath(path, d.Path.String()) + // TODO (b5) - We need this because it's possible to write a transform script + // that changes nothing other than the script itself, and we currently reject + // that change. Should we? I think I'm missing something. + if p == "transform.scriptPath" { + continue + } + build = append(build, d) + if len(d.Deltas) > 0 { + d.Deltas = preprocess(d.Deltas, joinPath(path, d.Path.String())) + } } return build } -func buildComponentChanges(deltas []*deepdiff.Delta) map[string]*ComponentChanges { +func buildComponentChanges(deltas deepdiff.Deltas) map[string]*ComponentChanges { perComponentChanges := make(map[string]*ComponentChanges) for _, d := range deltas { - if d.Path == "/transform/scriptPath" { - continue - } - parts := strings.Split(d.Path, "/") - if len(parts) < 2 { - log.Debugf("path %q cannot map to dataset delta", d.Path) - continue - } else if len(parts) == 2 { + compName := d.Path.String() + if d.Type != deepdiff.DTContext { // Entire component changed - compName := parts[1] - if d.Type == deepdiff.DTInsert || d.Type == deepdiff.DTDelete { + if d.Type == deepdiff.DTInsert || d.Type == deepdiff.DTDelete || d.Type == dtReplace { if _, ok := perComponentChanges[compName]; !ok { perComponentChanges[compName] = &ComponentChanges{} } @@ -149,43 +165,67 @@ func buildComponentChanges(deltas []*deepdiff.Delta) map[string]*ComponentChange log.Debugf("unknown delta type %q for path %q", d.Type, d.Path) continue } - } else { + } else if len(d.Deltas) > 0 { // Part of the component changed, record some state to build into a message later - compName := parts[1] - if _, ok := perComponentChanges[compName]; !ok { - perComponentChanges[compName] = &ComponentChanges{} - } - changes, _ := perComponentChanges[compName] + changes := &ComponentChanges{} + if compName == "body" { - changes.Num++ - if changes.Num <= smallNumberOfChangesToBody { - rowNum := parts[2] - rowModify := fmt.Sprintf("%s row %s", pastTense(string(d.Type)), rowNum) - changes.Rows = append(changes.Rows, rowModify) - } else { - changes.Rows = nil - } + buildBodyChanges(changes, "", d.Deltas) } else { - rowModify := fmt.Sprintf("%s %s", pastTense(string(d.Type)), - strings.Join(parts[2:], ".")) - changes.Rows = append(changes.Rows, rowModify) + buildChanges(changes, "", d.Deltas) } + + perComponentChanges[compName] = changes } } return perComponentChanges } +func buildChanges(changes *ComponentChanges, parentPath string, deltas deepdiff.Deltas) { + for _, d := range deltas { + if d.Type != deepdiff.DTContext { + rowModify := fmt.Sprintf("%s %s", pastTense(string(d.Type)), joinPath(parentPath, d.Path.String())) + changes.Rows = append(changes.Rows, rowModify) + } + + if len(d.Deltas) > 0 { + buildChanges(changes, joinPath(parentPath, d.Path.String()), d.Deltas) + } + } +} + +func buildBodyChanges(changes *ComponentChanges, parentPath string, deltas deepdiff.Deltas) { + for _, d := range deltas { + if d.Type == deepdiff.DTDelete || d.Type == deepdiff.DTInsert || d.Type == deepdiff.DTUpdate || d.Type == dtReplace { + changes.Num++ + if changes.Num <= smallNumberOfChangesToBody { + rowModify := fmt.Sprintf("%s row %s", pastTense(string(d.Type)), joinPath(parentPath, d.Path.String())) + changes.Rows = append(changes.Rows, rowModify) + } else { + changes.Rows = nil + } + } else if len(d.Deltas) > 0 { + buildBodyChanges(changes, joinPath(parentPath, d.Path.String()), d.Deltas) + } + } +} + +func joinPath(parent, element string) string { + if parent == "" { + return element + } + return fmt.Sprintf("%s.%s", parent, element) +} + func pastTense(text string) string { if text == string(deepdiff.DTDelete) { return "removed" } else if text == string(deepdiff.DTInsert) { return "added" - } else if text == string(deepdiff.DTMove) { - return "moved" } else if text == string(deepdiff.DTUpdate) { return "updated" } else if text == "replace" { - return "replaced" + return "updated" } return text } diff --git a/base/friendly/friendly_test.go b/base/friendly/friendly_test.go index 2f0061ecb..c3296ce89 100644 --- a/base/friendly/friendly_test.go +++ b/base/friendly/friendly_test.go @@ -10,21 +10,17 @@ import ( func TestFriendlyDiffDescriptions(t *testing.T) { // Change both the meta and structure - deltas := []*deepdiff.Delta{ - &deepdiff.Delta{ - Type: deepdiff.DTUpdate, - Path: "/meta/title", - Value: "def", - SourcePath: "/meta/title", - SourceValue: "abc", - }, - &deepdiff.Delta{ - Type: deepdiff.DTUpdate, - Path: "/structure/formatConfig/headerRow", - Value: true, - SourcePath: "/structure/formatConfig/headerRow", - SourceValue: false, - }, + deltas := deepdiff.Deltas{ + {Type: deepdiff.DTContext, Path: deepdiff.StringAddr("meta"), Deltas: deepdiff.Deltas{ + {Type: deepdiff.DTDelete, Path: deepdiff.StringAddr("title"), Value: "abc"}, + {Type: deepdiff.DTInsert, Path: deepdiff.StringAddr("title"), Value: "def"}, + }}, + {Type: deepdiff.DTContext, Path: deepdiff.StringAddr("structure"), Deltas: deepdiff.Deltas{ + {Type: deepdiff.DTContext, Path: deepdiff.StringAddr("formatConfig"), Deltas: deepdiff.Deltas{ + {Type: deepdiff.DTDelete, Path: deepdiff.StringAddr("headerRow"), Value: false}, + {Type: deepdiff.DTInsert, Path: deepdiff.StringAddr("headerRow"), Value: true}, + }}, + }}, } stats := deepdiff.Stats{ Left: 46, @@ -48,11 +44,11 @@ func TestBuildComponentChanges(t *testing.T) { // Change the meta.title deltas := []*deepdiff.Delta{ &deepdiff.Delta{ - Type: deepdiff.DTUpdate, - Path: "/meta/title", - Value: "def", - SourcePath: "/meta/title", - SourceValue: "abc", + Type: deepdiff.DTContext, + Path: deepdiff.StringAddr("meta"), + Deltas: deepdiff.Deltas{ + {Type: deepdiff.DTUpdate, Path: deepdiff.StringAddr("title"), Value: "def", SourceValue: "abc"}, + }, }, } m := buildComponentChanges(deltas) @@ -69,14 +65,12 @@ func TestBuildComponentChanges(t *testing.T) { } // Change the structure - deltas = []*deepdiff.Delta{ - &deepdiff.Delta{ - Type: deepdiff.DTUpdate, - Path: "/structure/formatConfig/headerRow", - Value: true, - SourcePath: "/structure/formatConfig/headerRow", - SourceValue: false, - }, + deltas = deepdiff.Deltas{ + {Type: deepdiff.DTContext, Path: deepdiff.StringAddr("structure"), Deltas: deepdiff.Deltas{ + {Type: deepdiff.DTContext, Path: deepdiff.StringAddr("formatConfig"), Deltas: deepdiff.Deltas{ + {Type: deepdiff.DTUpdate, Path: deepdiff.StringAddr("headerRow"), Value: true, SourceValue: false}, + }}, + }}, } m = buildComponentChanges(deltas) keys = getKeys(m) @@ -92,21 +86,15 @@ func TestBuildComponentChanges(t *testing.T) { } // Change both the meta and structure - deltas = []*deepdiff.Delta{ - &deepdiff.Delta{ - Type: deepdiff.DTUpdate, - Path: "/meta/title", - Value: "def", - SourcePath: "/meta/title", - SourceValue: "abc", - }, - &deepdiff.Delta{ - Type: deepdiff.DTUpdate, - Path: "/structure/formatConfig/headerRow", - Value: true, - SourcePath: "/structure/formatConfig/headerRow", - SourceValue: false, - }, + deltas = deepdiff.Deltas{ + {Type: deepdiff.DTContext, Path: deepdiff.StringAddr("meta"), Deltas: deepdiff.Deltas{ + {Type: deepdiff.DTUpdate, Path: deepdiff.StringAddr("title"), Value: "def", SourceValue: "abc"}, + }}, + {Type: deepdiff.DTContext, Path: deepdiff.StringAddr("structure"), Deltas: deepdiff.Deltas{ + {Type: deepdiff.DTContext, Path: deepdiff.StringAddr("formatConfig"), Deltas: deepdiff.Deltas{ + {Type: deepdiff.DTUpdate, Path: deepdiff.StringAddr("headerRow"), Value: true, SourceValue: false}, + }}, + }}, } m = buildComponentChanges(deltas) keys = getKeys(m) diff --git a/cmd/cmd_test.go b/cmd/cmd_test.go index 382b49efa..b787aef77 100644 --- a/cmd/cmd_test.go +++ b/cmd/cmd_test.go @@ -248,7 +248,7 @@ func TestSaveTwoComponents(t *testing.T) { // This dataset is ds_ten.yaml, with the meta replaced by meta_override ("different title") and // the structure replaced by structure_override (lazyQuotes: false && title: "name"). - expect := `{"bodyPath":"/ipfs/QmXhsUK6vGZrqarhw9Z8RCXqhmEpvtVByKtaYVarbDZ5zn","commit":{"author":{"id":"QmeL2mdVka1eahKENjehK6tBxkkpk5dNQ1qMcgWi7Hrb4B"},"message":"meta:\n\tupdated title\nstructure:\n\tremoved schema.items.type\n\tupdated formatConfig.lazyQuotes\n\tupdated schema.items.items.0.title","path":"/ipfs/QmSktsfiF2KG8APBVuqDntSMn34xHvXvG4rQ1tdRaXHboN","qri":"cm:0","signature":"njCFxpGqq0xJSrjgxC289KncjflqA0e00txweEqIyUTvEKSUBKHcfQmx4OQIJzJqQJdcjIEzFrwP9cdquozRgsnrpsSfKb+wBWdtbnrg8zfat0X/Dqjro6JD7afJf0gU9s5SDi/s8g/qZOLwWh1nuoH4UAeUX+l3DH0ocFjeD6r/YkMJ0KXaWaFloKP8UPasfqoei9PxxmYQuAnFMqpXFisB7mKFAbgbpF3eL80UcbQPTih7WF11SBym/AzJhGNvOivOjmRxKGEuqEH9g3NPTEQr+LnP415X4qiaZA6MVmOO66vC0diUN4vJUMvhTsWnVEBtgqjTRYlSaYwabHv/gA==","timestamp":"2001-01-01T01:02:01.000000001Z","title":"updated meta and structure"},"meta":{"qri":"md:0","title":"different title"},"path":"/ipfs/QmbeXDJqjurBe9b2jEm6uKMqxZnBLCM9V3LpVS6Q6pvNs2","peername":"me","previousPath":"/ipfs/QmVdDACqmUoFGCotChqSuYJMnocPwkXPifEB6kGqiTjhiL","qri":"ds:0","structure":{"checksum":"QmcXDEGeWdyzfFRYyPsQVab5qszZfKqxTMEoXRDSZMyrhf","depth":2,"errCount":1,"entries":8,"format":"csv","formatConfig":{"headerRow":true,"lazyQuotes":false},"length":224,"qri":"st:0","schema":{"items":{"items":[{"title":"name","type":"string"},{"title":"duration","type":"integer"}]},"type":"array"}},"viz":{"format":"html","qri":"vz:0","renderedPath":"/ipfs/QmXkN5J5yCAtF8GCxwRXARzAQhj3bPaSv1VHoyCCXzQRzN","scriptPath":"/ipfs/QmVM37PFzBcZn3qqKvyQ9rJ1jC8NkS8kYZNJke1Wje1jor"}}` + expect := `{"bodyPath":"/ipfs/QmXhsUK6vGZrqarhw9Z8RCXqhmEpvtVByKtaYVarbDZ5zn","commit":{"author":{"id":"QmeL2mdVka1eahKENjehK6tBxkkpk5dNQ1qMcgWi7Hrb4B"},"message":"meta:\n\tupdated title\nstructure:\n\tupdated formatConfig.lazyQuotes\n\tupdated schema.items.items.0.title","path":"/ipfs/Qmf51CD3zW64ffoWja32bKh3BSyMwvMSbh9A8PtrA7fDJi","qri":"cm:0","signature":"njCFxpGqq0xJSrjgxC289KncjflqA0e00txweEqIyUTvEKSUBKHcfQmx4OQIJzJqQJdcjIEzFrwP9cdquozRgsnrpsSfKb+wBWdtbnrg8zfat0X/Dqjro6JD7afJf0gU9s5SDi/s8g/qZOLwWh1nuoH4UAeUX+l3DH0ocFjeD6r/YkMJ0KXaWaFloKP8UPasfqoei9PxxmYQuAnFMqpXFisB7mKFAbgbpF3eL80UcbQPTih7WF11SBym/AzJhGNvOivOjmRxKGEuqEH9g3NPTEQr+LnP415X4qiaZA6MVmOO66vC0diUN4vJUMvhTsWnVEBtgqjTRYlSaYwabHv/gA==","timestamp":"2001-01-01T01:02:01.000000001Z","title":"updated meta and structure"},"meta":{"qri":"md:0","title":"different title"},"path":"/ipfs/QmcSJnaS6xwcJXMKS3SPbyCnvorTefpMeMMJeXyMfxtTq8","peername":"me","previousPath":"/ipfs/QmVdDACqmUoFGCotChqSuYJMnocPwkXPifEB6kGqiTjhiL","qri":"ds:0","structure":{"checksum":"QmcXDEGeWdyzfFRYyPsQVab5qszZfKqxTMEoXRDSZMyrhf","depth":2,"errCount":1,"entries":8,"format":"csv","formatConfig":{"headerRow":true,"lazyQuotes":false},"length":224,"qri":"st:0","schema":{"items":{"items":[{"title":"name","type":"string"},{"title":"duration","type":"integer"}]},"type":"array"}},"viz":{"format":"html","qri":"vz:0","renderedPath":"/ipfs/QmXkN5J5yCAtF8GCxwRXARzAQhj3bPaSv1VHoyCCXzQRzN","scriptPath":"/ipfs/QmVM37PFzBcZn3qqKvyQ9rJ1jC8NkS8kYZNJke1Wje1jor"}}` if diff := cmp.Diff(expect, actual); diff != "" { t.Errorf("dataset (-want +got):\n%s", diff) } @@ -441,21 +441,39 @@ func TestDiffRevisions(t *testing.T) { run.MustExec(t, "qri save --body=testdata/movies/body_thirty.csv me/test_movies") output := run.MustExec(t, "qri diff body me/test_movies") - expect := `+30 elements. 30 inserts. 0 deletes. 0 updates. - -+ 18: ["Dragonfly ",104] -+ 19: ["The Black Dahlia ",121] -+ 20: ["Flyboys ",140] -+ 21: ["The Last Castle ",131] -+ 22: ["Supernova ",91] -+ 23: ["Winter's Tale ",118] -+ 24: ["The Mortal Instruments: City of Bones ",130] -+ 25: ["Meet Dave ",90] -+ 26: ["Dark Water ",103] -+ 27: ["Edtv ",122] + expect := `+30 elements. 10 inserts. 0 deletes. + + 0: ["Avatar ",178] + 1: ["Pirates of the Caribbean: At World's End ",169] + 2: ["Spectre ",148] + 3: ["The Dark Knight Rises ",164] + 4: ["Star Wars: Episode VII - The Force Awakens ",""] + 5: ["John Carter ",132] + 6: ["Spider-Man 3 ",156] + 7: ["Tangled ",100] + 8: ["Avengers: Age of Ultron ",141] + 9: ["Harry Potter and the Half-Blood Prince ",153] + 10: ["Batman v Superman: Dawn of Justice ",183] + 11: ["Superman Returns ",169] + 12: ["Quantum of Solace ",106] + 13: ["Pirates of the Caribbean: Dead Man's Chest ",151] + 14: ["The Lone Ranger ",150] + 15: ["Man of Steel ",143] + 16: ["The Chronicles of Narnia: Prince Caspian ",150] + 17: ["The Avengers ",173] ++18: ["Dragonfly ",104] ++19: ["The Black Dahlia ",121] ++20: ["Flyboys ",140] ++21: ["The Last Castle ",131] ++22: ["Supernova ",91] ++23: ["Winter's Tale ",118] ++24: ["The Mortal Instruments: City of Bones ",130] ++25: ["Meet Dave ",90] ++26: ["Dark Water ",103] ++27: ["Edtv ",122] ` - if output != expect { - t.Errorf("error, did not match actual:\n\"%v\"\nexpect:\n\"%v\"\n", output, expect) + if diff := cmp.Diff(expect, output); diff != "" { + t.Errorf("output mismatch (-want +got):\n%s", diff) } } diff --git a/cmd/diff.go b/cmd/diff.go index f8731b057..61cf61eba 100644 --- a/cmd/diff.go +++ b/cmd/diff.go @@ -132,7 +132,7 @@ func (o *DiffOptions) Run() (err error) { } if o.Format == "json" { - json.NewEncoder(o.Out).Encode(res.Diff) + json.NewEncoder(o.Out).Encode(res) return } diff --git a/cmd/diff_test.go b/cmd/diff_test.go index 38d96a3f2..ff2ef0b44 100644 --- a/cmd/diff_test.go +++ b/cmd/diff_test.go @@ -4,6 +4,7 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" "github.com/qri-io/ioes" "github.com/qri-io/qri/base/dsfs" ) @@ -75,7 +76,12 @@ func TestDiffRun(t *testing.T) { Refs: NewListOfRefSelects([]string{"me/movies", "me/cities"}), Selector: "meta", }, - "0 elements. 0 inserts. 0 deletes. 1 update.\n\n~ title: \"example city data\"\n", + `0 elements. 1 insert. 1 delete. + + qri: "md:0" +-title: "example movie data" ++title: "example city data" +`, }, {"diff json output", &DiffOptions{ @@ -83,34 +89,32 @@ func TestDiffRun(t *testing.T) { Selector: "meta", Format: "json", }, - `[{"type":"update","path":"/title","value":"example city data","originalValue":"example movie data"}] + `{"stat":{"leftNodes":3,"rightNodes":3,"leftWeight":45,"rightWeight":43,"inserts":1,"deletes":1},"diff":[[" ","qri","md:0"],["-","title","example movie data"],["+","title","example city data"]]} `, }, } for _, c := range good { - dsr, err := f.DatasetRequests() - if err != nil { - t.Errorf("case %s, error creating dataset request: %s", c.description, err) - continue - } + t.Run(c.description, func(t *testing.T) { + dsr, err := f.DatasetRequests() + if err != nil { + t.Fatalf("case %s, error creating dataset request: %s", c.description, err) + } - opt := c.opt - opt.IOStreams = streams - opt.DatasetRequests = dsr + opt := c.opt + opt.IOStreams = streams + opt.DatasetRequests = dsr - if err = opt.Run(); err != nil { - t.Errorf("case %s unexpected error: %s", c.description, err) - ioReset(in, out, errs) - continue - } + if err = opt.Run(); err != nil { + t.Fatalf("case %s unexpected error: %s", c.description, err) + } + + if diff := cmp.Diff(out.String(), c.stdout); diff != "" { + t.Errorf("output mismatch (-want +got):\n%s", diff) + } - if c.stdout != out.String() { - t.Errorf("case %s, output mismatch. Expected: '%s', Got: '%s'", c.description, c.stdout, out.String()) ioReset(in, out, errs) - continue - } - ioReset(in, out, errs) + }) } bad := []struct { diff --git a/cmd/fsi_integration_test.go b/cmd/fsi_integration_test.go index 1fd05a1bd..f1c1169cf 100644 --- a/cmd/fsi_integration_test.go +++ b/cmd/fsi_integration_test.go @@ -1029,17 +1029,26 @@ run ` + "`qri save`" + ` to commit this dataset output = run.MustExec(t, "qri diff") expect = `for linked dataset [test_peer/diff_change] -+1 element. 1 insert. 0 deletes. 4 updates. - -body: - 0: - ~ 0: "lucky" - ~ 1: "number" - ~ 2: 17 - 1: - ~ 2: 321 -meta: - + title: "hello" ++1 element. 5 inserts. 4 deletes. + + body: + 0: + -0: "one" + +0: "lucky" + -1: "two" + +1: "number" + -2: 3 + +2: 17 + 1: + 0: "four" + 1: "five" + -2: 6 + +2: 321 + meta: + qri: "md:0" + +title: "hello" + qri: "ds:0" + structure: {"format":"csv","formatConfig":{"lazyQuotes":true},"qri":"st:0","schema":{"items":{"items":[{"title":"field_1","type":"string"},{"title":"field_2","type":"string"},{"title":"field_3","type":"integer"}],"type":"array"},"type":"array"}} ` if diff := cmpTextLines(expect, output); diff != "" { t.Errorf("qri status (-want +got):\n%s", diff) diff --git a/cmd/print.go b/cmd/print.go index 9fe79dab2..b550b15e1 100644 --- a/cmd/print.go +++ b/cmd/print.go @@ -167,26 +167,17 @@ func doesCommandExist(cmdName string) bool { } func printDiff(w io.Writer, res *lib.DiffResponse, summaryOnly bool) (err error) { - var stats, text string + buf := &bytes.Buffer{} // TODO (b5): this reading from a package variable is pretty hacky :/ - if color.NoColor { - stats = deepdiff.FormatPrettyStats(res.Stat) - if !summaryOnly { - text, err = deepdiff.FormatPretty(res.Diff) - if err != nil { - return err - } - } - } else { - stats = deepdiff.FormatPrettyStatsColor(res.Stat) - if !summaryOnly { - text, err = deepdiff.FormatPrettyColor(res.Diff) - if err != nil { - return err - } + // should use the IsATTY package from mattn + deepdiff.FormatPrettyStats(buf, res.Stat, !color.NoColor) + if !summaryOnly { + buf.WriteByte('\n') + if err = deepdiff.FormatPretty(buf, res.Diff, !color.NoColor); err != nil { + return err } } - buf := bytes.NewBuffer([]byte(stats + "\n" + text)) + printToPager(w, buf) return nil } diff --git a/go.mod b/go.mod index 7492e14d5..4bdce1b2a 100644 --- a/go.mod +++ b/go.mod @@ -47,7 +47,7 @@ require ( github.com/qri-io/apiutil v0.1.0 github.com/qri-io/dag v0.2.1-0.20191025201336-254aa177fbd7 github.com/qri-io/dataset v0.1.5-0.20191126212116-72b5aa69790b - github.com/qri-io/deepdiff v0.1.1-0.20191118194347-ac31500b9917 + github.com/qri-io/deepdiff v0.1.1-0.20200305020550-8173efebcaa1 github.com/qri-io/doggos v0.1.0 github.com/qri-io/ioes v0.1.0 github.com/qri-io/iso8601 v0.1.0 diff --git a/go.sum b/go.sum index df5a47b05..7f46d45aa 100644 --- a/go.sum +++ b/go.sum @@ -873,6 +873,13 @@ github.com/qri-io/dataset v0.1.5-0.20191126212116-72b5aa69790b h1:I1/2Vb0uHkaz4H github.com/qri-io/dataset v0.1.5-0.20191126212116-72b5aa69790b/go.mod h1:efJPKDyyGItDnI9CzKuQYmKiEP0xp4mIxIut7o/bG2g= github.com/qri-io/deepdiff v0.1.1-0.20191118194347-ac31500b9917 h1:KR/lVzcP9SaxokGBcMggXCEu8F8q6JkRcZH02sLqQ9Y= github.com/qri-io/deepdiff v0.1.1-0.20191118194347-ac31500b9917/go.mod h1:/O1HQVAlm3yhgQvNwyzsKaECfM2vVSx6KxttgXQQGFw= +github.com/qri-io/deepdiff v0.1.1-0.20200213140139-1cdda9aff3e7 h1:QwBlFHyB5K9r1AQTAUINRakeqvIWGdap64dknXNhuc8= +github.com/qri-io/deepdiff v0.1.1-0.20200213140139-1cdda9aff3e7/go.mod h1:NrL/b7YvexgpGb4HEO3Rlx5RrMLDfxuKDf/XDAq5ac0= +github.com/qri-io/deepdiff v0.1.1-0.20200303185551-fef8c216601d/go.mod h1:NrL/b7YvexgpGb4HEO3Rlx5RrMLDfxuKDf/XDAq5ac0= +github.com/qri-io/deepdiff v0.1.1-0.20200304024845-339d0cac0e1b h1:oJrtzp9Mq9wYq5oI++el7iUQLa3oQelw+qp5hzgFHo4= +github.com/qri-io/deepdiff v0.1.1-0.20200304024845-339d0cac0e1b/go.mod h1:NrL/b7YvexgpGb4HEO3Rlx5RrMLDfxuKDf/XDAq5ac0= +github.com/qri-io/deepdiff v0.1.1-0.20200305020550-8173efebcaa1 h1:LR8poNGx/qkhcNU0eWmsPexQ69s/MK+kvFNfAhTTlK0= +github.com/qri-io/deepdiff v0.1.1-0.20200305020550-8173efebcaa1/go.mod h1:NrL/b7YvexgpGb4HEO3Rlx5RrMLDfxuKDf/XDAq5ac0= github.com/qri-io/doggos v0.1.0 h1:B7Hn9ssRGDAonMhJ4UwDtPDmG9GtvLR8f7VFec7Rs7M= github.com/qri-io/doggos v0.1.0/go.mod h1:FmyznoDwt2mhskYpnasyoEsxNr490qVbX9Sf6NP2Crg= github.com/qri-io/ioes v0.1.0 h1:MCkiJcBhxFS9DKrvtrUQ+o1IQtZFKYioMt12XauqUT0= diff --git a/lib/datasets_test.go b/lib/datasets_test.go index 1f7d372f9..5d77ffaae 100644 --- a/lib/datasets_test.go +++ b/lib/datasets_test.go @@ -184,7 +184,7 @@ func TestDatasetRequestsSaveRecall(t *testing.T) { FilePaths: []string{metaOnePath}, ReturnBody: true}, res) if err != nil { - t.Error(err.Error()) + t.Fatal(err.Error()) } err = r.Save(&SaveParams{ @@ -192,7 +192,7 @@ func TestDatasetRequestsSaveRecall(t *testing.T) { FilePaths: []string{metaOnePath}, Recall: "wut"}, res) if err == nil { - t.Error("expected bad recall to error") + t.Fatal("expected bad recall to error") } err = r.Save(&SaveParams{ @@ -200,7 +200,7 @@ func TestDatasetRequestsSaveRecall(t *testing.T) { FilePaths: []string{metaTwoPath}, Recall: "tf"}, res) if err != nil { - t.Error(err) + t.Fatal(err) } if res.Dataset.Transform == nil { t.Error("expected transform to exist on recalled save") diff --git a/lib/diff.go b/lib/diff.go index 4b8d8e42c..bb8d9a674 100644 --- a/lib/diff.go +++ b/lib/diff.go @@ -39,10 +39,10 @@ type DiffParams struct { // DiffResponse is the result of a call to diff type DiffResponse struct { - Stat *DiffStat `json:"stat,omitempty"` - Diff []*Delta `json:"diff,omitempty"` - A interface{} `json:"b,omitempty"` - B interface{} `json:"a,omitempty"` + Stat *DiffStat `json:"stat,omitempty"` + SchemaStat *DiffStat `json:"schemaStat,omitempty"` + Schema []*Delta `json:"schema,omitempty"` + Diff []*Delta `json:"diff,omitempty"` } // Diff computes the diff of two datasets @@ -80,10 +80,13 @@ func (r *DatasetRequests) Diff(p *DiffParams, res *DiffResponse) (err error) { return err } - res.Stat = &deepdiff.Stats{} - res.A = leftData - res.B = rightData - res.Diff, err = deepdiff.Diff(leftData, rightData, deepdiff.OptionSetStats(res.Stat)) + res.Schema, res.SchemaStat, err = schemaDiff(ctx, leftComp, rightComp) + if err != nil { + return err + } + + dd := deepdiff.New() + res.Diff, res.Stat, err = dd.StatDiff(ctx, leftData, rightData) return err } else if dsref.IsRefString(p.LeftPath) && p.RightPath == "" { // Left parameter with a blank right parameter needs either working directory or as-previous @@ -232,9 +235,46 @@ func (r *DatasetRequests) Diff(p *DiffParams, res *DiffResponse) (err error) { return err } - res.Stat = &deepdiff.Stats{} - res.A = leftData - res.B = rightData - res.Diff, err = deepdiff.Diff(leftData, rightData, deepdiff.OptionSetStats(res.Stat)) + dd := deepdiff.New() + res.Diff, res.Stat, err = dd.StatDiff(ctx, leftData, rightData) return err } + +func schemaDiff(ctx context.Context, left, right *component.BodyComponent) ([]*Delta, *DiffStat, error) { + dd := deepdiff.New() + if left.Format == ".csv" && right.Format == ".csv" { + left, err := terribleHackToGetHeaderRow(left.InferredSchema) + if err != nil { + return nil, nil, err + } + + right, err := terribleHackToGetHeaderRow(right.InferredSchema) + if err != nil { + return nil, nil, err + } + + return dd.StatDiff(ctx, left, right) + } + return dd.StatDiff(ctx, left.InferredSchema, right.InferredSchema) +} + +// TODO (b5) - this is terrible. We need better logic error handling for +// jsonschemas describing CSV data. We're relying too heavily on the schema +// being well-formed +func terribleHackToGetHeaderRow(sch map[string]interface{}) ([]string, error) { + if itemObj, ok := sch["items"].(map[string]interface{}); ok { + if itemArr, ok := itemObj["items"].([]interface{}); ok { + titles := make([]string, len(itemArr)) + for i, f := range itemArr { + if field, ok := f.(map[string]interface{}); ok { + if title, ok := field["title"].(string); ok { + titles[i] = title + } + } + } + return titles, nil + } + } + log.Debug("that terrible hack to detect header row & types just failed") + return nil, fmt.Errorf("nope") +} diff --git a/lib/diff_test.go b/lib/diff_test.go index c261746c3..f369e2c40 100644 --- a/lib/diff_test.go +++ b/lib/diff_test.go @@ -1,7 +1,6 @@ package lib import ( - "encoding/json" "testing" "github.com/google/go-cmp/cmp" @@ -47,31 +46,31 @@ func TestDatasetRequestsDiff(t *testing.T) { {"two fully qualified references", dsRef1.String(), dsRef2.String(), "", - &DiffStat{Left: 41, Right: 42, LeftWeight: 2548, RightWeight: 2770, Inserts: 0, Updates: 8, Deletes: 0, Moves: 0}, - 8, + &DiffStat{Left: 41, Right: 42, LeftWeight: 2548, RightWeight: 2770, Inserts: 8, Updates: 0, Deletes: 8}, + 9, }, {"fill left path from history", dsRef2.AliasString(), dsRef2.AliasString(), "", - &DiffStat{Left: 41, Right: 42, LeftWeight: 2548, RightWeight: 2770, Inserts: 0, Updates: 8, Deletes: 0, Moves: 0}, - 8, + &DiffStat{Left: 41, Right: 42, LeftWeight: 2548, RightWeight: 2770, Inserts: 8, Updates: 0, Deletes: 8}, + 9, }, {"two local file paths", "testdata/jobs_by_automation/body.csv", "testdata/jobs_by_automation_2/body.csv", "", - &DiffStat{Left: 151, Right: 151, LeftWeight: 3757, RightWeight: 3757, Inserts: 0, Updates: 1, Deletes: 0, Moves: 0}, - 1, + &DiffStat{Left: 151, Right: 151, LeftWeight: 3757, RightWeight: 3757, Inserts: 1, Updates: 0, Deletes: 1}, + 30, }, {"diff local csv & json file", "testdata/now_tf/input.dataset.json", "testdata/jobs_by_automation/body.csv", "", - &DiffStat{Left: 10, Right: 151, LeftWeight: 162, RightWeight: 3757, Inserts: 151, Updates: 0, Deletes: 151, Moves: 0}, + &DiffStat{Left: 10, Right: 151, LeftWeight: 162, RightWeight: 3757, Inserts: 1, Updates: 0, Deletes: 1}, 2, }, {"case-sensitive key change", djsOnePath, djsTwoPath, "", - &DiffStat{Left: 4, Right: 4, LeftWeight: 18, RightWeight: 18, Inserts: 3, Updates: 0, Deletes: 1, Moves: 0}, + &DiffStat{Left: 4, Right: 4, LeftWeight: 18, RightWeight: 18, Inserts: 1, Updates: 0, Deletes: 1}, 2, }, } @@ -99,8 +98,8 @@ func TestDatasetRequestsDiff(t *testing.T) { t.Errorf("result mismatch (-want +got):\n%s", diff) } - dlt, _ := json.MarshalIndent(res.Diff, "", " ") - t.Logf("%s", dlt) + // dlt, _ := json.MarshalIndent(res.Diff, "", " ") + // t.Logf("%s", dlt) if len(res.Diff) != c.DeltaLen { t.Errorf("%d: \"%s\" delta length mismatch. want: %d got: %d", i, c.description, c.DeltaLen, len(res.Diff)) diff --git a/logbook/logbook.go b/logbook/logbook.go index 157dd631f..920f64356 100644 --- a/logbook/logbook.go +++ b/logbook/logbook.go @@ -96,7 +96,7 @@ type Book struct { fsLocation string fs qfs.Filesystem - listener func(*Action) + listener func(*Action) } // NewBook creates a book with a user-provided logstore