Bump go-git

This commit is contained in:
Stefan Haller 2025-04-09 10:38:46 +02:00
parent da0105c16b
commit 4cf49ff449
527 changed files with 70489 additions and 10167 deletions

View file

@ -120,7 +120,7 @@ func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, d
// Restore the prefix and suffix.
if len(commonprefix) != 0 {
diffs = append([]Diff{Diff{DiffEqual, string(commonprefix)}}, diffs...)
diffs = append([]Diff{{DiffEqual, string(commonprefix)}}, diffs...)
}
if len(commonsuffix) != 0 {
diffs = append(diffs, Diff{DiffEqual, string(commonsuffix)})
@ -165,8 +165,8 @@ func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, dea
// Single character string.
// After the previous speedup, the character can't be an equality.
return []Diff{
Diff{DiffDelete, string(text1)},
Diff{DiffInsert, string(text2)},
{DiffDelete, string(text1)},
{DiffInsert, string(text2)},
}
// Check to see if the problem can be split in two.
} else if hm := dmp.diffHalfMatch(text1, text2); hm != nil {
@ -193,7 +193,7 @@ func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, dea
// diffLineMode does a quick line-level diff on both []runes, then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff {
// Scan the text on a line-by-line basis first.
text1, text2, linearray := dmp.diffLinesToRunes(text1, text2)
text1, text2, linearray := dmp.DiffLinesToRunes(string(text1), string(text2))
diffs := dmp.diffMainRunes(text1, text2, false, deadline)
@ -368,8 +368,8 @@ func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time)
}
// Diff took too long and hit the deadline or number of diffs equals number of characters, no commonality at all.
return []Diff{
Diff{DiffDelete, string(runes1)},
Diff{DiffInsert, string(runes2)},
{DiffDelete, string(runes1)},
{DiffInsert, string(runes2)},
}
}
@ -390,66 +390,25 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
// DiffLinesToChars splits two texts into a list of strings, and educes the texts to a string of hashes where each Unicode character represents one line.
// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2)
return string(chars1), string(chars2), lineArray
}
// DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line.
func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
// '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
chars1 := dmp.diffLinesToRunesMunge(text1, &lineArray, lineHash)
chars2 := dmp.diffLinesToRunesMunge(text2, &lineArray, lineHash)
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
return chars1, chars2, lineArray
}
func (dmp *DiffMatchPatch) diffLinesToRunes(text1, text2 []rune) ([]rune, []rune, []string) {
return dmp.DiffLinesToRunes(string(text1), string(text2))
}
// diffLinesToRunesMunge splits a text into an array of strings, and reduces the texts to a []rune where each Unicode character represents one line.
// We use strings instead of []runes as input mainly because you can't use []rune as a map key.
func (dmp *DiffMatchPatch) diffLinesToRunesMunge(text string, lineArray *[]string, lineHash map[string]int) []rune {
// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
lineStart := 0
lineEnd := -1
runes := []rune{}
for lineEnd < len(text)-1 {
lineEnd = indexOf(text, "\n", lineStart)
if lineEnd == -1 {
lineEnd = len(text) - 1
}
line := text[lineStart : lineEnd+1]
lineStart = lineEnd + 1
lineValue, ok := lineHash[line]
if ok {
runes = append(runes, rune(lineValue))
} else {
*lineArray = append(*lineArray, line)
lineHash[line] = len(*lineArray) - 1
runes = append(runes, rune(len(*lineArray)-1))
}
}
return runes
// DiffLinesToRunes splits two texts into a list of runes.
func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
return []rune(chars1), []rune(chars2), lineArray
}
// DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text.
func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
hydrated := make([]Diff, 0, len(diffs))
for _, aDiff := range diffs {
chars := aDiff.Text
text := make([]string, len(chars))
runes := []rune(aDiff.Text)
text := make([]string, len(runes))
for i, r := range chars {
text[i] = lineArray[r]
for i, r := range runes {
text[i] = lineArray[runeToInt(r)]
}
aDiff.Text = strings.Join(text, "")
@ -670,16 +629,16 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
// An insertion or deletion.
if diffs[pointer].Type == DiffInsert {
lengthInsertions2 += len(diffs[pointer].Text)
lengthInsertions2 += utf8.RuneCountInString(diffs[pointer].Text)
} else {
lengthDeletions2 += len(diffs[pointer].Text)
lengthDeletions2 += utf8.RuneCountInString(diffs[pointer].Text)
}
// Eliminate an equality that is smaller or equal to the edits on both sides of it.
difference1 := int(math.Max(float64(lengthInsertions1), float64(lengthDeletions1)))
difference2 := int(math.Max(float64(lengthInsertions2), float64(lengthDeletions2)))
if len(lastequality) > 0 &&
(len(lastequality) <= difference1) &&
(len(lastequality) <= difference2) {
if utf8.RuneCountInString(lastequality) > 0 &&
(utf8.RuneCountInString(lastequality) <= difference1) &&
(utf8.RuneCountInString(lastequality) <= difference2) {
// Duplicate record.
insPoint := equalities[len(equalities)-1]
diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})
@ -728,8 +687,8 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion)
overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion)
if overlapLength1 >= overlapLength2 {
if float64(overlapLength1) >= float64(len(deletion))/2 ||
float64(overlapLength1) >= float64(len(insertion))/2 {
if float64(overlapLength1) >= float64(utf8.RuneCountInString(deletion))/2 ||
float64(overlapLength1) >= float64(utf8.RuneCountInString(insertion))/2 {
// Overlap found. Insert an equality and trim the surrounding edits.
diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]})
@ -739,8 +698,8 @@ func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
pointer++
}
} else {
if float64(overlapLength2) >= float64(len(deletion))/2 ||
float64(overlapLength2) >= float64(len(insertion))/2 {
if float64(overlapLength2) >= float64(utf8.RuneCountInString(deletion))/2 ||
float64(overlapLength2) >= float64(utf8.RuneCountInString(insertion))/2 {
// Reverse overlap found. Insert an equality and swap and trim the surrounding edits.
overlap := Diff{DiffEqual, deletion[:overlapLength2]}
diffs = splice(diffs, pointer, 0, overlap)
@ -1029,7 +988,7 @@ func (dmp *DiffMatchPatch) DiffCleanupMerge(diffs []Diff) []Diff {
if x > 0 && diffs[x-1].Type == DiffEqual {
diffs[x-1].Text += string(textInsert[:commonlength])
} else {
diffs = append([]Diff{Diff{DiffEqual, string(textInsert[:commonlength])}}, diffs...)
diffs = append([]Diff{{DiffEqual, string(textInsert[:commonlength])}}, diffs...)
pointer++
}
textInsert = textInsert[commonlength:]
@ -1343,3 +1302,46 @@ func (dmp *DiffMatchPatch) DiffFromDelta(text1 string, delta string) (diffs []Di
return diffs, nil
}
// diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
// '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
lineHash := make(map[string]int)
//Each string has the index of lineArray which it points to
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray, lineHash)
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray, lineHash)
return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
}
// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string, lineHash map[string]int) []uint32 {
// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
lineStart := 0
lineEnd := -1
strs := []uint32{}
for lineEnd < len(text)-1 {
lineEnd = indexOf(text, "\n", lineStart)
if lineEnd == -1 {
lineEnd = len(text) - 1
}
line := text[lineStart : lineEnd+1]
lineStart = lineEnd + 1
lineValue, ok := lineHash[line]
if ok {
strs = append(strs, uint32(lineValue))
} else {
*lineArray = append(*lineArray, line)
lineHash[line] = len(*lineArray) - 1
strs = append(strs, uint32(len(*lineArray)-1))
}
}
return strs
}

View file

@ -324,7 +324,7 @@ func (dmp *DiffMatchPatch) PatchAddPadding(patches []Patch) string {
paddingLength := dmp.PatchMargin
nullPadding := ""
for x := 1; x <= paddingLength; x++ {
nullPadding += string(x)
nullPadding += string(rune(x))
}
// Bump all the patches forward.

View file

@ -9,10 +9,16 @@
package diffmatchpatch
import (
"fmt"
"strings"
"unicode/utf8"
)
const UNICODE_INVALID_RANGE_START = 0xD800
const UNICODE_INVALID_RANGE_END = 0xDFFF
const UNICODE_INVALID_RANGE_DELTA = UNICODE_INVALID_RANGE_END - UNICODE_INVALID_RANGE_START + 1
const UNICODE_RANGE_MAX = 0x10FFFF
// unescaper unescapes selected chars for compatibility with JavaScript's encodeURI.
// In speed critical applications this could be dropped since the receiving application will certainly decode these fine. Note that this function is case-sensitive. Thus "%3F" would not be unescaped. But this is ok because it is only called with the output of HttpUtility.UrlEncode which returns lowercase hex. Example: "%3f" -> "?", "%24" -> "$", etc.
var unescaper = strings.NewReplacer(
@ -86,3 +92,99 @@ func runesIndex(r1, r2 []rune) int {
}
return -1
}
func intArrayToString(ns []uint32) string {
if len(ns) == 0 {
return ""
}
b := []rune{}
for _, n := range ns {
b = append(b, intToRune(n))
}
return string(b)
}
// These constants define the number of bits representable
// in 1,2,3,4 byte utf8 sequences, respectively.
const ONE_BYTE_BITS = 7
const TWO_BYTE_BITS = 11
const THREE_BYTE_BITS = 16
const FOUR_BYTE_BITS = 21
// Helper for getting a sequence of bits from an integer.
func getBits(i uint32, cnt byte, from byte) byte {
return byte((i >> from) & ((1 << cnt) - 1))
}
// Converts an integer in the range 0~1112060 into a rune.
// Based on the ranges table in https://en.wikipedia.org/wiki/UTF-8
func intToRune(i uint32) rune {
if i < (1 << ONE_BYTE_BITS) {
return rune(i)
}
if i < (1 << TWO_BYTE_BITS) {
r, size := utf8.DecodeRune([]byte{0b11000000 | getBits(i, 5, 6), 0b10000000 | getBits(i, 6, 0)})
if size != 2 || r == utf8.RuneError {
panic(fmt.Sprintf("Error encoding an int %d with size 2, got rune %v and size %d", size, r, i))
}
return r
}
// Last -3 here needed because for some reason 3rd to last codepoint 65533 in this range
// was returning utf8.RuneError during encoding.
if i < ((1 << THREE_BYTE_BITS) - UNICODE_INVALID_RANGE_DELTA - 3) {
if i >= UNICODE_INVALID_RANGE_START {
i += UNICODE_INVALID_RANGE_DELTA
}
r, size := utf8.DecodeRune([]byte{0b11100000 | getBits(i, 4, 12), 0b10000000 | getBits(i, 6, 6), 0b10000000 | getBits(i, 6, 0)})
if size != 3 || r == utf8.RuneError {
panic(fmt.Sprintf("Error encoding an int %d with size 3, got rune %v and size %d", size, r, i))
}
return r
}
if i < (1<<FOUR_BYTE_BITS - UNICODE_INVALID_RANGE_DELTA - 3) {
i += UNICODE_INVALID_RANGE_DELTA + 3
r, size := utf8.DecodeRune([]byte{0b11110000 | getBits(i, 3, 18), 0b10000000 | getBits(i, 6, 12), 0b10000000 | getBits(i, 6, 6), 0b10000000 | getBits(i, 6, 0)})
if size != 4 || r == utf8.RuneError {
panic(fmt.Sprintf("Error encoding an int %d with size 4, got rune %v and size %d", size, r, i))
}
return r
}
panic(fmt.Sprintf("The integer %d is too large for runeToInt()", i))
}
// Converts a rune generated by intToRune back to an integer
func runeToInt(r rune) uint32 {
i := uint32(r)
if i < (1 << ONE_BYTE_BITS) {
return i
}
bytes := []byte{0, 0, 0, 0}
size := utf8.EncodeRune(bytes, r)
if size == 2 {
return uint32(bytes[0]&0b11111)<<6 | uint32(bytes[1]&0b111111)
}
if size == 3 {
result := uint32(bytes[0]&0b1111)<<12 | uint32(bytes[1]&0b111111)<<6 | uint32(bytes[2]&0b111111)
if result >= UNICODE_INVALID_RANGE_END {
return result - UNICODE_INVALID_RANGE_DELTA
}
return result
}
if size == 4 {
result := uint32(bytes[0]&0b111)<<18 | uint32(bytes[1]&0b111111)<<12 | uint32(bytes[2]&0b111111)<<6 | uint32(bytes[3]&0b111111)
return result - UNICODE_INVALID_RANGE_DELTA - 3
}
panic(fmt.Sprintf("Unexpected state decoding rune=%v size=%d", r, size))
}