diff --git a/README.md b/README.md index 1879ef3..cb526cc 100644 --- a/README.md +++ b/README.md @@ -99,3 +99,8 @@ this testing in the future. context of each fragment must exactly match the source file; `git apply` implements a search algorithm that tries different lines and amounts of context, with further options to normalize or ignore whitespace changes. + +7. Multi-parent differences (produced by `git --cc`) are supported with two + parents. The full specification for the difference allows for more than + two, but that is not completely supported. + diff --git a/gitdiff/combined.go b/gitdiff/combined.go new file mode 100644 index 0000000..ecba149 --- /dev/null +++ b/gitdiff/combined.go @@ -0,0 +1,324 @@ +package gitdiff + +import ( + "fmt" + "io" + "strconv" + "strings" +) + +// ParseCombinedTextFragments parses text fragments with 2 or more parents until the +// next file header or the end of the stream and attaches them to the given file. It +// returns the number of fragments that were added. +func (p *parser) ParseCombinedTextFragments(f *File) (n int, err error) { + for { + frags, err := p.ParseCombinedTextFragmentHeader() + if err != nil { + return n, err + } + if len(frags) <= 0 { + return n, nil + } + + for _, frag := range frags { + if f.IsNew && frag.OldLines > 0 { + return n, p.Errorf(-1, "new file depends on old contents") + } + if f.IsDelete && frag.NewLines > 0 { + return n, p.Errorf(-1, "deleted file still has contents") + } + } + + if err := p.ParseCombinedTextChunk(frags); err != nil { + return n, err + } + + f.TextFragments = append(f.TextFragments, frags...) + n += len(frags) + } +} + +func (p *parser) ParseCombinedTextFragmentHeader() ([]*TextFragment, error) { + // There are (number of parents + 1) @ characters in the chunk header for combined diff format. + // This implementation is generic enough to use for both the standard '@@ ' text diff and for + // the combined diff. However, for stability and performance reasons, they are split into + // different implementations. + const ( + parentMark = '@' + minStartMark = "@@@" + trailingStartMark = "@ -" + ) + line := p.Line(0) + + if !strings.HasPrefix(line, minStartMark) { + return nil, nil + } + + // Find wrapping markers around the range, and, in doing so, count the number of parent files. + startEnd := strings.Index(line, trailingStartMark) + if startEnd < 0 { + return nil, nil + } + parentCount := 0 + endMark := " @" + for ; parentCount < startEnd; parentCount++ { + // check for valid combined form marker. + if line[parentCount] != parentMark { + return nil, nil + } + endMark += line[parentCount : parentCount+1] + } + + // Split up the line into sections. + // Keep the leading '-' on the first range. + startPos := startEnd + len(trailingStartMark) - 1 + parts := strings.SplitAfterN(p.Line(0), endMark, 2) + if len(parts) < 2 { + return nil, p.Errorf(0, "invalid fragment header") + } + comment := strings.TrimSpace(parts[1]) + + // Collect the file ranges. + header := parts[0][startPos : len(parts[0])-len(endMark)] + ranges := strings.Split(header, " ") + if len(ranges) != parentCount+1 { + return nil, p.Errorf(0, "invalid fragment header") + } + frags, err := parseCombinedHeaderRanges(comment, ranges) + if err != nil { + return nil, p.Errorf(0, "invalid fragment header: %v", err) + } + + if err := p.Next(); err != nil && err != io.EOF { + return nil, err + } + return frags, nil +} + +func parseCombinedHeaderRanges(comment string, ranges []string) ([]*TextFragment, error) { + parentCount := len(ranges) - 1 + + // This needs to cope with a strange old-line range of '-1' that some versions + // of Git produce. When this happens, the old and new line counts must increment by 1. + var negativeCount int64 = 0 + + // Parse the merged range. + var err error + newPosition, newLines, err := parseCombinedRange(ranges[parentCount][1:]) + if err != nil { + return nil, err + } + if newLines < 0 { + negativeCount -= newLines + // The newLines count remains negative, so it adjusts to zero at the end. + } + + // Parse the parent file ranges. + frags := make([]*TextFragment, parentCount) + for i := 0; i < parentCount; i++ { + f := &TextFragment{ + Comment: comment, + NewPosition: newPosition, + NewLines: newLines, + } + if f.OldPosition, f.OldLines, err = parseCombinedRange(ranges[i][1:]); err != nil { + return nil, err + } + if f.OldLines < 0 { + negativeCount -= f.OldLines + // The OldLines count remains negative, so the final adjustment makes it zero. + } + frags[i] = f + } + + // Adjust each fragment count based on the negative count. + for _, f := range frags { + f.OldLines += negativeCount + f.NewLines += negativeCount + } + + return frags, nil +} + +func parseCombinedRange(s string) (start int64, end int64, err error) { + parts := strings.SplitN(s, ",", 2) + + if start, err = strconv.ParseInt(parts[0], 10, 64); err != nil { + nerr := err.(*strconv.NumError) + return 0, 0, fmt.Errorf("bad start of range: %s: %v", parts[0], nerr.Err) + } + + if len(parts) > 1 { + if parts[1] == "18446744073709551615" { + // There are some versions of "git diff --cc" that return a uint64 version of -1, + // which is this number. That seems to mean that, for this specific file, + // there wasn't any change. + // This is the only version of this large number that's been seen, though it's possible that + // the git diff --cc format can return other negative numbers. In those cases, more complex + // logic would be needed to convert the uint64 to signed int64. + end = -1 + } else if end, err = strconv.ParseInt(parts[1], 10, 64); err != nil { + nerr := err.(*strconv.NumError) + return 0, 0, fmt.Errorf("bad end of range: %s: %v", parts[1], nerr.Err) + } + } else { + end = 1 + } + + return +} + +func (p *parser) ParseCombinedTextChunk(frags []*TextFragment) error { + if p.Line(0) == "" { + return p.Errorf(0, "no content following fragment header") + } + parentCount := len(frags) + + // Track whether any line included an alteration. + noLineChanges := true + + // Only count leading and trailing context when it applies to all the files. + var leadingContext int64 = 0 + var trailingContext int64 = 0 + + // Pre-allocate the per-filter altered check. + // It's only used within the per-line, but it's always re-initialized on each pass. + altered := make([]bool, parentCount) + +lineLoop: + for { + line := p.Line(0) + // Should be able to count the lines required by the range header, + // however there are some rare times when that does not correctly align. + // Therefore, the 'text.go' version of line count checking isn't used here. + if !areValidOps(line, parentCount) { + break + } + + parentOps, data := line[0:parentCount], line[parentCount:] + + // Each character in parentOps is for each parent, to show how target file line + // differs from each file of the parents. If a fragment has a '-', then it is + // a removal. If another fragment has a '+' but this one has a ' ', then + // it's also a removal. + if parentOps == "\n" { + // newer GNU diff versions create empty context lines + data = "\n" + parentOps = "" + } + + hasAdd := false + hasRemove := false + hasContext := false + for idx, op := range parentOps { + frag := frags[idx] + altered[idx] = false + + switch op { + case ' ': + // Context lines + hasContext = true + frag.Lines = append(frag.Lines, Line{OpContext, data}) + // Adjustment of the leading and trailing context count can only happen + // by analyzing all the file operations, so that happens after the line's + // operation checks. + case '-': + hasRemove = true + altered[idx] = true + noLineChanges = false + frag.LinesDeleted++ + trailingContext = 0 + frag.Lines = append(frag.Lines, Line{OpDelete, data}) + case '+': + hasAdd = true + altered[idx] = true + noLineChanges = false + frag.LinesAdded++ + trailingContext = 0 + frag.Lines = append(frag.Lines, Line{OpAdd, data}) + case '\\': + // this may appear in middle of fragment if it's for a deleted line + if isNoNewlineMarker(line) { + removeLastNewline(frag) + // Move on to the next line. + continue lineLoop + } + fallthrough + default: + // TODO(bkeyes): if this is because we hit the next header, it + // would be helpful to return the miscounts line error. We could + // either test for the common headers ("@@ -", "diff --git", "@@@ -") or + // assume any invalid op ends the fragment; git returns the same + // generic error in all cases so either is compatible + return p.Errorf(0, "invalid line operation: %q", op) + } + } + + // The complex counting method. + + // Lines with removes reduce the old line count once per removal operation, and + // the counting happens during each file's removal action. + if !hasRemove && !hasAdd && hasContext { + // Lines with no removes, no adds, and had at least 1 context entry + // means that this line a full context - no add and no remove. + if noLineChanges { + leadingContext++ + } else { + trailingContext++ + } + } + + if err := p.Next(); err != nil { + if err == io.EOF { + break + } + return err + } + } + + // There is a rare, but valid, scenario where the line counts don't match up with + // what was parsed. This seems related to the "-1" range value. Because of that, + // this function can't validate the header line count against the lines encountered. + + if noLineChanges { + return p.Errorf(0, "fragment contains no changes") + } + + // Check for a final "no newline" marker since it is not included in the + // counters used to stop the loop above + if isNoNewlineMarker(p.Line(0)) { + for _, frag := range frags { + removeLastNewline(frag) + } + if err := p.Next(); err != nil && err != io.EOF { + return err + } + } + + // Because the leading and trailing context can only be determined on a whole line basis, + // and the value can change depending on later discoveries, this count only has meaning + // at the very end. + for _, frag := range frags { + frag.LeadingContext = leadingContext + frag.TrailingContext = trailingContext + } + + return nil +} + +func areValidOps(line string, count int) bool { + if len(line) < count { + // Generally, this happens with an empty line ('\n'), but could also be file corruption. + return false + } + for count > 0 { + count-- + c := line[count] + switch c { + case ' ', '+', '-', '\\': + default: + return false + } + } + return true +} diff --git a/gitdiff/combined_test.go b/gitdiff/combined_test.go new file mode 100644 index 0000000..a549583 --- /dev/null +++ b/gitdiff/combined_test.go @@ -0,0 +1,772 @@ +package gitdiff + +import ( + "io" + "reflect" + "testing" +) + +func TestParseCombinedTextFragmentHeader(t *testing.T) { + tests := map[string]struct { + Input string + Output []TextFragment + Err string + }{ + "shortest": { + Input: "@@@ -1 -1 +1 @@@\n", + Output: []TextFragment{{ + OldPosition: 1, + OldLines: 1, + NewPosition: 1, + NewLines: 1, + }, { + OldPosition: 1, + OldLines: 1, + NewPosition: 1, + NewLines: 1, + }}, + }, + "twoParent": { + Input: "@@@ -52,1 -50,2 +52,2 @@@\n", + Output: []TextFragment{{ + OldPosition: 52, + OldLines: 1, + NewPosition: 52, + NewLines: 2, + }, { + OldPosition: 50, + OldLines: 2, + NewPosition: 52, + NewLines: 2, + }}, + }, + "threeParent": { + Input: "@@@@ -52,1 -50,2 -38,5 +42,9 @@@@\n", + Output: []TextFragment{{ + OldPosition: 52, + OldLines: 1, + NewPosition: 42, + NewLines: 9, + }, { + OldPosition: 50, + OldLines: 2, + NewPosition: 42, + NewLines: 9, + }, { + OldPosition: 38, + OldLines: 5, + NewPosition: 42, + NewLines: 9, + }}, + }, + "trailingComment": { + Input: "@@@ -52,1 -50,2 +52,2 @@@ func test(n int) {\n", + Output: []TextFragment{{ + Comment: "func test(n int) {", + OldPosition: 52, + OldLines: 1, + NewPosition: 52, + NewLines: 2, + }, { + Comment: "func test(n int) {", + OldPosition: 50, + OldLines: 2, + NewPosition: 52, + NewLines: 2, + }}, + }, + "negativeCount2": { + Input: "@@@ -229,4 -229,18446744073709551615 +228,3 @@@ Comment\n", + Output: []TextFragment{{ + Comment: "Comment", + OldPosition: 229, + OldLines: 5, + NewPosition: 228, + NewLines: 4, + }, { + Comment: "Comment", + OldPosition: 229, + OldLines: 0, + NewPosition: 228, + NewLines: 4, + }}, + }, + "negativeCount3": { + Input: "@@@ -229,4 -229,1 +228,18446744073709551615 @@@ Comment\n", + Output: []TextFragment{{ + Comment: "Comment", + OldPosition: 229, + OldLines: 5, + NewPosition: 228, + NewLines: 0, + }, { + Comment: "Comment", + OldPosition: 229, + OldLines: 2, + NewPosition: 228, + NewLines: 0, + }}, + }, + "incompleteThree": { + Input: "@@@ -12,3 -5,9 +2\n", + Err: "gitdiff: line 1: invalid fragment header", + }, + "incompleteTwo": { + Input: "@@@ -12,3 +2,5\n", + Err: "gitdiff: line 1: invalid fragment header", + }, + "incompleteFour": { + Input: "@@@@ -12,3 -5,9 -2,6\n", + Err: "gitdiff: line 1: invalid fragment header", + }, + "incompleteFourDiffs": { + Input: "@@@@ -12,3 -5,9 -2,6 @@@@\n", + Err: "gitdiff: line 1: invalid fragment header", + }, + "badNumbers": { + Input: "@@@ -1a,2b -3c,4d +5e,6f @@@\n", + Err: "gitdiff: line 1: invalid fragment header: bad start of range: 5e: invalid syntax", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + frags, err := p.ParseCombinedTextFragmentHeader() + if test.Err != "" { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing header, but got %v", err) + } + if test.Err != err.Error() { + t.Errorf("incorrect error: expected %s actual %v", test.Err, err) + } + return + } + if err != nil { + t.Fatalf("error parsing header: %v", err) + } + + if !reflect.DeepEqual(joinFragmentPtr(test.Output), frags) { + t.Errorf("incorrect fragment\nexpected: %+v\nactual: %+v", test.Output, reduceFragmentPtr(frags)) + } + }) + } +} + +func TestCombinedParseTextChunk(t *testing.T) { + tests := map[string]struct { + Input string + Fragments []TextFragment + + Output []TextFragment + Err bool + }{ + "removeAddRemoveDAdd": { + Input: `- old line 1.0 ++ new line 1.1 + -old line 2.1 +++new line 1.2,2.2 +`, + Fragments: []TextFragment{ + {OldLines: 1, NewLines: 2}, + {OldLines: 2, NewLines: 2}, + }, + Output: []TextFragment{ + { + OldLines: 1, + NewLines: 2, + Lines: []Line{ + {OpDelete, "old line 1.0\n"}, + {OpAdd, "new line 1.1\n"}, + {OpContext, "old line 2.1\n"}, + {OpAdd, "new line 1.2,2.2\n"}, + }, + LinesAdded: 2, + LinesDeleted: 1, + LeadingContext: 0, + TrailingContext: 0, + }, + { + OldLines: 2, + NewLines: 2, + Lines: []Line{ + {OpContext, "old line 1.0\n"}, + {OpContext, "new line 1.1\n"}, + {OpDelete, "old line 2.1\n"}, + {OpAdd, "new line 1.2,2.2\n"}, + }, + LinesAdded: 1, + LinesDeleted: 1, + LeadingContext: 0, + TrailingContext: 0, + }, + }, + }, + "addRemoveAddRemove": { + Input: ` -remove line 1.1 + +add line 1.2 +- remove line 2.1 ++ add line 2.2 +`, + Fragments: []TextFragment{ + {OldLines: 2, NewLines: 2}, + {OldLines: 2, NewLines: 2}, + }, + Output: []TextFragment{ + { + OldLines: 2, + NewLines: 2, + Lines: []Line{ + {OpContext, "remove line 1.1\n"}, + {OpContext, "add line 1.2\n"}, + {OpDelete, "remove line 2.1\n"}, + {OpAdd, "add line 2.2\n"}, + }, + LinesAdded: 1, + LinesDeleted: 1, + LeadingContext: 0, + TrailingContext: 0, + }, + { + OldLines: 2, + NewLines: 2, + Lines: []Line{ + {OpDelete, "remove line 1.1\n"}, + {OpAdd, "add line 1.2\n"}, + {OpContext, "remove line 2.1\n"}, + {OpContext, "add line 2.2\n"}, + }, + LinesAdded: 1, + LinesDeleted: 1, + LeadingContext: 0, + TrailingContext: 0, + }, + }, + }, + "removeAdd3RemoveAdd": { + Input: `- remove line 1.1 ++ add line 1.2 + -remove line 2.1 + -remove line 2.2 + -remove line 2.3 + +add line 2.4 + `, + Fragments: []TextFragment{ + {OldLines: 2, NewLines: 2}, + {OldLines: 4, NewLines: 2}, + }, + Output: []TextFragment{ + { + OldLines: 2, + NewLines: 2, + Lines: []Line{ + {OpDelete, "remove line 1.1\n"}, + {OpAdd, "add line 1.2\n"}, + {OpContext, "remove line 2.1\n"}, + {OpContext, "remove line 2.2\n"}, + {OpContext, "remove line 2.3\n"}, + {OpContext, "add line 2.4\n"}, + }, + LinesAdded: 1, + LinesDeleted: 1, + LeadingContext: 0, + TrailingContext: 0, + }, + { + OldLines: 4, + NewLines: 2, + Lines: []Line{ + {OpContext, "remove line 1.1\n"}, + {OpContext, "add line 1.2\n"}, + {OpDelete, "remove line 2.1\n"}, + {OpDelete, "remove line 2.2\n"}, + {OpDelete, "remove line 2.3\n"}, + {OpAdd, "add line 2.4\n"}, + }, + LinesAdded: 1, + LinesDeleted: 3, + LeadingContext: 0, + TrailingContext: 0, + }, + }, + }, + "bothChangeEol": { + Input: `- remove line 1.1 + -remove line 2.1 +++add line 1.1,2.2`, + Fragments: []TextFragment{ + {OldLines: 1, NewLines: 1}, + {OldLines: 1, NewLines: 1}, + }, + Output: []TextFragment{ + { + OldLines: 1, + NewLines: 1, + Lines: []Line{ + {OpDelete, "remove line 1.1\n"}, + {OpContext, "remove line 2.1\n"}, + {OpAdd, "add line 1.1,2.2"}, + }, + LinesAdded: 1, + LinesDeleted: 1, + LeadingContext: 0, + TrailingContext: 0, + }, + { + OldLines: 1, + NewLines: 1, + Lines: []Line{ + {OpContext, "remove line 1.1\n"}, + {OpDelete, "remove line 2.1\n"}, + {OpAdd, "add line 1.1,2.2"}, + }, + LinesAdded: 1, + LinesDeleted: 1, + LeadingContext: 0, + TrailingContext: 0, + }, + }, + }, + "doubleRemove": { + Input: `--line 1.1,2.1 +- line 1.2 + -line 2.2 +`, + Fragments: []TextFragment{ + {OldLines: 2, NewLines: 0}, + {OldLines: 2, NewLines: 0}, + }, + Output: []TextFragment{ + { + OldLines: 2, + NewLines: 0, + Lines: []Line{ + {OpDelete, "line 1.1,2.1\n"}, + {OpDelete, "line 1.2\n"}, + {OpContext, "line 2.2\n"}, + }, + LinesAdded: 0, + LinesDeleted: 2, + LeadingContext: 0, + TrailingContext: 0, + }, + { + OldLines: 2, + NewLines: 0, + Lines: []Line{ + {OpDelete, "line 1.1,2.1\n"}, + {OpContext, "line 1.2\n"}, + {OpDelete, "line 2.2\n"}, + }, + LinesAdded: 0, + LinesDeleted: 2, + LeadingContext: 0, + TrailingContext: 0, + }, + }, + }, + "doubleAddAdd": { + Input: `++line 1.1,2.1 ++ line 1.2 +`, + Fragments: []TextFragment{ + {OldLines: 0, NewLines: 2}, + {OldLines: 1, NewLines: 2}, + }, + Output: []TextFragment{ + { + OldLines: 0, + NewLines: 2, + Lines: []Line{ + {OpAdd, "line 1.1,2.1\n"}, + {OpAdd, "line 1.2\n"}, + }, + LinesAdded: 2, + LinesDeleted: 0, + LeadingContext: 0, + TrailingContext: 0, + }, + { + OldLines: 1, + NewLines: 2, + Lines: []Line{ + {OpAdd, "line 1.1,2.1\n"}, + {OpContext, "line 1.2\n"}, + }, + LinesAdded: 1, + LinesDeleted: 0, + LeadingContext: 0, + TrailingContext: 0, + }, + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + frags := joinFragmentPtr(test.Fragments) + err := p.ParseCombinedTextChunk(frags) + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing text chunk, but got %v", err) + } + return + } + if err != nil { + t.Fatalf("error parsing text chunk: %v", err) + } + + if !reflect.DeepEqual(test.Output, reduceFragmentPtr(frags)) { + t.Errorf("incorrect fragment\nexpected: %+v\nactual: %+v", test.Output, reduceFragmentPtr(frags)) + } + }) + } +} + +func TestCombinedParseTextFragments(t *testing.T) { + tests := map[string]struct { + Input string + File File + + Fragments []TextFragment + Err string + }{ + "multipleChanges": { + Input: `@@@ -54,0 -52,2 +51,3 @@@ file 1 +++line 1.1.1,1.2.1 ++ line 1.1.1 ++ line 1.1.2 +@@@ -134,1 -136,1 +136,0 @@@ file 2 +- line 2.1.1 + -line 2.2.1 +`, + Fragments: []TextFragment{ + { + Comment: "file 1", + OldPosition: 54, + OldLines: 0, + NewPosition: 51, + NewLines: 3, + Lines: []Line{ + {OpAdd, "line 1.1.1,1.2.1\n"}, + {OpAdd, "line 1.1.1\n"}, + {OpAdd, "line 1.1.2\n"}, + }, + LinesAdded: 3, + LinesDeleted: 0, + LeadingContext: 0, + TrailingContext: 0, + }, + { + Comment: "file 1", + OldPosition: 52, + OldLines: 2, + NewPosition: 51, + NewLines: 3, + Lines: []Line{ + {OpAdd, "line 1.1.1,1.2.1\n"}, + {OpContext, "line 1.1.1\n"}, + {OpContext, "line 1.1.2\n"}, + }, + LinesDeleted: 0, + LinesAdded: 1, + LeadingContext: 0, + TrailingContext: 0, + }, + { + Comment: "file 2", + OldPosition: 134, + OldLines: 1, + NewPosition: 136, + NewLines: 0, + Lines: []Line{ + {OpDelete, "line 2.1.1\n"}, + {OpContext, "line 2.2.1\n"}, + }, + LinesDeleted: 1, + LinesAdded: 0, + LeadingContext: 0, + TrailingContext: 0, + }, + { + Comment: "file 2", + OldPosition: 136, + OldLines: 1, + NewPosition: 136, + NewLines: 0, + Lines: []Line{ + {OpContext, "line 2.1.1\n"}, + {OpDelete, "line 2.2.1\n"}, + }, + LinesDeleted: 1, + LinesAdded: 0, + LeadingContext: 0, + TrailingContext: 0, + }, + }, + }, + "strangeCount": { + // This test is why the implementation can't rely upon the lines changed. + // Its values are off by 1, according to the description of the meaning in the + // documentation. + Input: `@@@ -225,4 -237,2 +225,6 @@@ C0 + +line 1 + +line 2 + +line 3 + +line 4 +- line 5 +++line 6 +++line 7 ++ line 8 + -line 9 + -line 10 +`, + Fragments: []TextFragment{ + { + Comment: "C0", + OldPosition: 225, + OldLines: 4, + NewPosition: 225, + NewLines: 6, + Lines: []Line{ + {OpContext, "line 1\n"}, + {OpContext, "line 2\n"}, + {OpContext, "line 3\n"}, + {OpContext, "line 4\n"}, + {OpDelete, "line 5\n"}, + {OpAdd, "line 6\n"}, + {OpAdd, "line 7\n"}, + {OpAdd, "line 8\n"}, + {OpContext, "line 9\n"}, + {OpContext, "line 10\n"}, + }, + LinesDeleted: 1, + LinesAdded: 3, + }, + { + Comment: "C0", + OldPosition: 237, + OldLines: 2, + NewPosition: 225, + NewLines: 6, + Lines: []Line{ + {OpAdd, "line 1\n"}, + {OpAdd, "line 2\n"}, + {OpAdd, "line 3\n"}, + {OpAdd, "line 4\n"}, + {OpContext, "line 5\n"}, + {OpAdd, "line 6\n"}, + {OpAdd, "line 7\n"}, + {OpContext, "line 8\n"}, + {OpDelete, "line 9\n"}, + {OpDelete, "line 10\n"}, + }, + LinesDeleted: 2, + LinesAdded: 6, + }, + }, + }, + "negativeLines1": { + Input: `@@@ -229,4 -230,18446744073709551615 +228,3 @@@ C1 + +line 1 + +line 2 + +line 3 + +line 4 +- line 5 +`, + Fragments: []TextFragment{ + { + Comment: "C1", + OldPosition: 229, + OldLines: 5, + NewPosition: 228, + NewLines: 4, + Lines: []Line{ + {OpContext, "line 1\n"}, + {OpContext, "line 2\n"}, + {OpContext, "line 3\n"}, + {OpContext, "line 4\n"}, + {OpDelete, "line 5\n"}, + }, + LinesDeleted: 1, + LinesAdded: 0, + }, + { + Comment: "C1", + OldPosition: 230, + OldLines: 0, + NewPosition: 228, + NewLines: 4, + Lines: []Line{ + {OpAdd, "line 1\n"}, + {OpAdd, "line 2\n"}, + {OpAdd, "line 3\n"}, + {OpAdd, "line 4\n"}, + {OpContext, "line 5\n"}, + }, + LinesDeleted: 0, + LinesAdded: 4, + }, + }, + }, + "negativeLines2": { + Input: `@@@ -232,18446744073709551615 -227,1 +230,1 @@@ +++line 1 ++ line 2 + -line 3 +`, + Fragments: []TextFragment{ + { + OldPosition: 232, + OldLines: 0, + NewPosition: 230, + NewLines: 2, + Lines: []Line{ + {OpAdd, "line 1\n"}, + {OpAdd, "line 2\n"}, + {OpContext, "line 3\n"}, + }, + LinesDeleted: 0, + LinesAdded: 2, + }, + { + OldPosition: 227, + OldLines: 2, + NewPosition: 230, + NewLines: 2, + Lines: []Line{ + {OpAdd, "line 1\n"}, + {OpContext, "line 2\n"}, + {OpDelete, "line 3\n"}, + }, + LinesDeleted: 1, + LinesAdded: 1, + }, + }, + }, + "negativeMergedLines": { + Input: `@@@ -1,2 -1,2 +1,18446744073709551615 @@@ +--line 1 +--line 2 +--line 3 +`, + File: File{IsDelete: true}, + Fragments: []TextFragment{ + { + OldPosition: 1, + OldLines: 3, + NewPosition: 1, + NewLines: 0, + Lines: []Line{ + {OpDelete, "line 1\n"}, + {OpDelete, "line 2\n"}, + {OpDelete, "line 3\n"}, + }, + LinesDeleted: 3, + LinesAdded: 0, + }, + { + OldPosition: 1, + OldLines: 3, + NewPosition: 1, + NewLines: 0, + Lines: []Line{ + {OpDelete, "line 1\n"}, + {OpDelete, "line 2\n"}, + {OpDelete, "line 3\n"}, + }, + LinesDeleted: 3, + LinesAdded: 0, + }, + }, + }, + "badNewFile": { + Input: `@@@@ -1 -1 -1 +1,2 @@@@ +---old line 1 ++ new line 1 ++ new line 2 + `, + File: File{ + IsNew: true, + }, + Err: "gitdiff: line 1: new file depends on old contents", + }, + "badDeletedFile": { + Input: `@@@ -1,2 -1 +1 @@@ +--old line 1 + context line + `, + File: File{ + IsDelete: true, + }, + Err: "gitdiff: line 1: deleted file still has contents", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + file := test.File + n, err := p.ParseCombinedTextFragments(&file) + if test.Err != "" { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing text fragments, but returned %v", err) + } + if test.Err != err.Error() { + t.Fatalf("Incorrect error text: expected %v, actual %v", test.Err, err) + } + return + } + if err != nil { + t.Fatalf("error parsing text fragments: %v", err) + } + + if len(test.Fragments) != n { + t.Fatalf("incorrect number of added fragments: expected %d, actual %d", len(test.Fragments), n) + } + + for i, frag := range test.Fragments { + if !reflect.DeepEqual(&frag, file.TextFragments[i]) { + t.Errorf("incorrect fragment at position %d\nexpected: %+v\nactual: %+v", i, frag, *file.TextFragments[i]) + } + } + }) + } +} + +func joinFragmentPtr(frags []TextFragment) []*TextFragment { + ret := make([]*TextFragment, len(frags)) + for i, f := range frags { + r := copyFragment(&f) + ret[i] = &r + } + return ret +} + +func reduceFragmentPtr(frags []*TextFragment) []TextFragment { + ret := make([]TextFragment, len(frags)) + for i, f := range frags { + r := copyFragment(f) + ret[i] = r + } + return ret +} + +func copyFragment(f *TextFragment) TextFragment { + return TextFragment{ + Comment: f.Comment, + OldPosition: f.OldPosition, + OldLines: f.OldLines, + NewPosition: f.NewPosition, + NewLines: f.NewLines, + LinesAdded: f.LinesAdded, + LinesDeleted: f.LinesDeleted, + LeadingContext: f.LeadingContext, + TrailingContext: f.TrailingContext, + Lines: f.Lines, + } +} diff --git a/gitdiff/file_header.go b/gitdiff/file_header.go index 58904b4..214454f 100644 --- a/gitdiff/file_header.go +++ b/gitdiff/file_header.go @@ -29,6 +29,7 @@ func (p *parser) ParseNextFileHeader() (*File, string, error) { if frag != nil { return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header()) } + // note: not checking for disconnected combined fragment header. // check for a git-generated patch file, err = p.ParseGitFileHeader() @@ -61,12 +62,17 @@ func (p *parser) ParseNextFileHeader() (*File, string, error) { } func (p *parser) ParseGitFileHeader() (*File, error) { - const prefix = "diff --git " - - if !strings.HasPrefix(p.Line(0), prefix) { + const prefix1 = "diff --git " + const prefix2 = "diff --cc " + var header string + + if strings.HasPrefix(p.Line(0), prefix1) { + header = p.Line(0)[len(prefix1):] + } else if strings.HasPrefix(p.Line(0), prefix2) { + header = p.Line(0)[len(prefix2):] + } else { return nil, nil } - header := p.Line(0)[len(prefix):] defaultName, err := parseGitHeaderName(header) if err != nil { @@ -270,6 +276,11 @@ func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) parse func(*File, string, string) error }{ {"@@ -", true, nil}, + // TODO '@@@ -' is the simplest form of the combined diff; it + // could be any number of '@' - one per parent + 1 '@'. + // The fix involves adding a separate '@' count discovery + // before this block (as '@' is the most commonly encountered structure). + {"@@@ -", true, nil}, {"--- ", false, parseGitHeaderOldName}, {"+++ ", false, parseGitHeaderNewName}, {"old mode ", false, parseGitHeaderOldMode}, diff --git a/gitdiff/parser.go b/gitdiff/parser.go index 5ffa9bd..1008bae 100644 --- a/gitdiff/parser.go +++ b/gitdiff/parser.go @@ -51,6 +51,7 @@ func Parse(r io.Reader) (<-chan *File, error) { for _, fn := range []func(*File) (int, error){ p.ParseTextFragments, + p.ParseCombinedTextFragments, p.ParseBinaryFragments, } { n, err := fn(file) diff --git a/gitdiff/parser_test.go b/gitdiff/parser_test.go index 5040b0a..3060485 100644 --- a/gitdiff/parser_test.go +++ b/gitdiff/parser_test.go @@ -96,6 +96,20 @@ index 9540595..30e6333 100644 +++ b/dir/file.txt @@ -1,2 +1,3 @@ context line +`, + Parse: func(p *parser) error { + _, err := p.ParseGitFileHeader() + return err + }, + EndLine: "@@ -1,2 +1,3 @@\n", + }, + "ParseCCFileHeader": { + Input: `diff --cc a/dir/file.txt b/dir/file.txt +index 9540595..30e6333 100644 +--- a/dir/file.txt ++++ b/dir/file.txt +@@ -1,2 +1,3 @@ +context line `, Parse: func(p *parser) error { _, err := p.ParseGitFileHeader() @@ -493,6 +507,173 @@ Date: Tue Apr 2 22:55:40 2019 -0700 }, Preamble: binaryPreamble, }, + "threeCombinedFiles": { + InputFile: "testdata/three_combined_files.patch", + Output: []*File{ + { + PatchHeader: &PatchHeader{ + SHA: "5d9790fec7d95aa223f3d20936340bf55ff3dcbe", + Author: &PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + AuthorDate: asTime("2019-04-02T22:55:40-07:00"), + Title: "A file with multiple fragments, including multiple parents.", + Body: "The content is arbitrary.", + }, + OldName: "dir/file1.txt", + NewName: "dir/file1.txt", + OldMode: os.FileMode(0100644), + OldOIDPrefix: "ebe9fa54", + NewOIDPrefix: "fe103e1d", + TextFragments: textFragments, + }, + { + PatchHeader: &PatchHeader{ + SHA: "5d9790fec7d95aa223f3d20936340bf55ff3dcbe", + Author: &PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + AuthorDate: asTime("2019-04-02T22:55:40-07:00"), + Title: "A file with multiple fragments, including multiple parents.", + Body: "The content is arbitrary.", + }, + OldName: "dir/file2.txt", + NewName: "dir/file2.txt", + OldMode: os.FileMode(0100644), + OldOIDPrefix: "417ebc70", + NewOIDPrefix: "67514b7f", + TextFragments: []*TextFragment{ + { + OldPosition: 3, + OldLines: 3, + NewPosition: 3, + NewLines: 0, + Comment: "fragment 1", + Lines: []Line{ + {OpContext, "context line\n"}, + {OpDelete, "old line 1\n"}, + {OpContext, "old line 2\n"}, + {OpContext, "context line\n"}, + }, + LinesAdded: 0, + LinesDeleted: 1, + LeadingContext: 1, + TrailingContext: 1, + }, + { + OldPosition: 2, + OldLines: 3, + NewPosition: 3, + NewLines: 0, + Comment: "fragment 1", + Lines: []Line{ + {OpContext, "context line\n"}, + {OpContext, "old line 1\n"}, + {OpDelete, "old line 2\n"}, + {OpContext, "context line\n"}, + }, + LinesAdded: 0, + LinesDeleted: 1, + LeadingContext: 1, + TrailingContext: 1, + }, + { + OldPosition: 31, + OldLines: 2, + NewPosition: 33, + NewLines: 1, + Comment: "fragment 2", + Lines: []Line{ + {OpContext, "context line\n"}, + {OpDelete, "old line 1\n"}, + {OpAdd, "new line 2\n"}, + }, + LinesAdded: 1, + LinesDeleted: 1, + LeadingContext: 1, + }, + { + OldPosition: 30, + OldLines: 2, + NewPosition: 33, + NewLines: 1, + Comment: "fragment 2", + Lines: []Line{ + {OpContext, "context line\n"}, + {OpContext, "old line 1\n"}, + {OpContext, "new line 2\n"}, + }, + LinesAdded: 0, + LinesDeleted: 0, + LeadingContext: 1, + }, + }, + }, + { + PatchHeader: &PatchHeader{ + SHA: "5d9790fec7d95aa223f3d20936340bf55ff3dcbe", + Author: &PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + AuthorDate: asTime("2019-04-02T22:55:40-07:00"), + Title: "A file with multiple fragments, including multiple parents.", + Body: "The content is arbitrary.", + }, + OldName: "dir/file3.txt", + NewName: "dir/file3.txt", + OldMode: os.FileMode(0100644), + OldOIDPrefix: "1b39fa54", + NewOIDPrefix: "f5103e1d", + TextFragments: textFragments, + }, + }, + Preamble: textPreamble, + }, + "emptyCommit": { + InputFile: "testdata/empty-commit.patch", + Output: []*File{ + { + PatchHeader: &PatchHeader{ + SHA: "48f13f61afb200ad8386573632cba3abd1703af2", + Author: &PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + AuthorDate: asTime("2017-11-03T12:29:49+00:00"), + Title: "A simple change", + Body: "The change is simple.", + }, + OldName: "dir/file1.txt", + NewName: "dir/file1.txt", + OldMode: os.FileMode(0100644), + OldOIDPrefix: "422cce7", + NewOIDPrefix: "24b39ed", + TextFragments: textFragments, + }, + { + PatchHeader: &PatchHeader{ + SHA: "183d9cdbecda47e6e95acf9e4e23fa3a71ba99ad", + Author: &PatchIdentity{ + Name: "Regina Smithee", + Email: "rsmithee@example.com", + }, + AuthorDate: asTime("2017-09-14T19:46:12+00:00"), + Title: "Simple change", + Body: "Simple change body.", + }, + OldName: "d1/file2.txt", + NewName: "d1/file2.txt", + OldMode: os.FileMode(0100755), + OldOIDPrefix: "c6513ff", + NewOIDPrefix: "bf4c6cc", + TextFragments: textFragments, + }, + }, + Preamble: textPreamble, + }, } for name, test := range tests { @@ -595,7 +776,7 @@ func newTestParser(input string, init bool) *parser { } func asTime(s string) time.Time { - t, err := time.Parse(time.RFC3339, s) + t, err := time.ParseInLocation(time.RFC3339, s, time.Local) if err != nil { panic(err) } diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index a4671b7..e944667 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -294,10 +294,14 @@ func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { if title != "" { // Don't check for an appendix - body, _ := scanMessageBody(s, indent, false) + body, _, remainder := scanMessageBody(s, indent, false) if s.Err() != nil { return nil, s.Err() } + if remainder != "" { + // There was another header immediately after this one. + return ParsePatchHeader(remainder) + } h.Body = body } @@ -326,15 +330,15 @@ func scanMessageTitle(s *bufio.Scanner) (title string, indent string) { return b.String(), indent } -func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) { +func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string, string) { // Body and appendix var body, appendix strings.Builder c := &body var empty int for i := 0; s.Scan(); i++ { - line := s.Text() + baseLine := s.Text() - line = strings.TrimRightFunc(line, unicode.IsSpace) + line := strings.TrimRightFunc(baseLine, unicode.IsSpace) line = strings.TrimPrefix(line, indent) if line == "" { @@ -342,6 +346,18 @@ func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (st continue } + if baseLine == line && indent != "" { + // The line does not start with the indent. + var remainder strings.Builder + remainder.WriteString(baseLine) + remainder.WriteByte('\n') + for ; s.Scan(); i++ { + remainder.WriteString(s.Text()) + remainder.WriteByte('\n') + } + return body.String(), appendix.String(), remainder.String() + } + // If requested, parse out "appendix" information (often added // by `git format-patch` and removed by `git am`). if separateAppendix && c == &body && line == "---" { @@ -359,7 +375,7 @@ func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (st c.WriteString(line) } - return body.String(), appendix.String() + return body.String(), appendix.String(), "" } func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { @@ -402,7 +418,7 @@ func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { h.SubjectPrefix, h.Title = parseSubject(subject) s := bufio.NewScanner(msg.Body) - h.Body, h.BodyAppendix = scanMessageBody(s, "", true) + h.Body, h.BodyAppendix, _ = scanMessageBody(s, "", true) if s.Err() != nil { return nil, s.Err() } diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index 9181b5c..1450535 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -441,6 +441,39 @@ Author: Morton Haypenny Title: expectedTitle, }, }, + "emptyCommit": { + Input: `commit 989f970ba7e43cd0eac6fcf71acfd9c92effc047 +Author: Morton Haypenny +Date: Sat Apr 12 05:20:49 2020 -0700 + + An empty commit + + With a body. + + +commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b +Author: Morton Haypenny +Date: Sat Apr 11 15:21:23 2020 -0700 + + A sample commit to test header parsing + + + The medium format shows the body, which + may wrap on to multiple lines. + + + Another body line. + + +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedTitle, + Body: expectedBody, + }, + }, } for name, test := range tests { diff --git a/gitdiff/testdata/empty-commit.patch b/gitdiff/testdata/empty-commit.patch new file mode 100644 index 0000000..8896704 --- /dev/null +++ b/gitdiff/testdata/empty-commit.patch @@ -0,0 +1,83 @@ + +commit 48f13f61afb200ad8386573632cba3abd1703af2 +Author: Morton Haypenny +Date: Thu Nov 3 12:29:49 2017 +0000 + + A simple change + + The change is simple. + +diff --git a/dir/file1.txt b/dir/file1.txt +index 422cce7..24b39ed 100644 +--- a/dir/file1.txt ++++ b/dir/file1.txt +@@ -3,6 +3,8 @@ fragment 1 + context line +-old line 1 +-old line 2 + context line ++new line 1 ++new line 2 ++new line 3 + context line +-old line 3 ++new line 4 ++new line 5 +@@ -31,2 +33,2 @@ fragment 2 + context line +-old line 4 ++new line 6 + +commit dbbc5f1e926391d737e397ec60cc1ff94787e105 +Author: Regina Smithee +Date: Thu Sep 29 12:58:35 2017 +0000 + + Merged commit 3 + + Merged body 3. + + +commit f90c2e23af6618388cb4082f39849476e39105af +Merge: 989f970 e84c7ab +Author: Regina Smithee +Date: Thu Sep 15 16:37:30 2017 +0000 + + Merged commit 2 + + +commit 989f970ba7e43cd0eac6fcf71acfd9c92effc047 +Merge: 183d9cd e84c7ab +Author: Regina Smithee +Date: Thu Sep 15 14:41:57 2017 +0000 + + Merged commit 1 + + +commit 183d9cdbecda47e6e95acf9e4e23fa3a71ba99ad +Author: Regina Smithee +Date: Wed Sep 14 19:46:12 2017 +0000 + + Simple change + + Simple change body. + +diff --git a/d1/file2.txt b/d1/file2.txt +index c6513ff..bf4c6cc 100755 +--- a/d1/file2.txt ++++ b/d1/file2.txt +@@ -3,6 +3,8 @@ fragment 1 + context line +-old line 1 +-old line 2 + context line ++new line 1 ++new line 2 ++new line 3 + context line +-old line 3 ++new line 4 ++new line 5 +@@ -31,2 +33,2 @@ fragment 2 + context line +-old line 4 ++new line 6 diff --git a/gitdiff/testdata/three_combined_files.patch b/gitdiff/testdata/three_combined_files.patch new file mode 100644 index 0000000..e1bb7be --- /dev/null +++ b/gitdiff/testdata/three_combined_files.patch @@ -0,0 +1,61 @@ +commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe +Author: Morton Haypenny +Date: Tue Apr 2 22:55:40 2019 -0700 + + A file with multiple fragments, including multiple parents. + + The content is arbitrary. + +diff --git a/dir/file1.txt b/dir/file1.txt +index ebe9fa54..fe103e1d 100644 +--- a/dir/file1.txt ++++ b/dir/file1.txt +@@ -3,6 +3,8 @@ fragment 1 + context line +-old line 1 +-old line 2 + context line ++new line 1 ++new line 2 ++new line 3 + context line +-old line 3 ++new line 4 ++new line 5 +@@ -31,2 +33,2 @@ fragment 2 + context line +-old line 4 ++new line 6 +diff --cc dir/file2.txt +index 417ebc70..67514b7f 100644 +--- a/dir/file2.txt ++++ b/dir/file2.txt +@@@ -3,3 -2,3 +3,0 @@@ fragment 1 + context line +- old line 1 + -old line 2 + context line +@@@ -31,2 -30,2 +33,1 @@@ fragment 2 + context line +- old line 1 ++ new line 2 +diff --git a/dir/file3.txt b/dir/file3.txt +index 1b39fa54..f5103e1d 100644 +--- a/dir/file3.txt ++++ b/dir/file3.txt +@@ -3,6 +3,8 @@ fragment 1 + context line +-old line 1 +-old line 2 + context line ++new line 1 ++new line 2 ++new line 3 + context line +-old line 3 ++new line 4 ++new line 5 +@@ -31,2 +33,2 @@ fragment 2 + context line +-old line 4 ++new line 6 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e69de29