Implement ScanLinesAndTruncateWhenLongerThanBuffer

This commit is contained in:
Stefan Haller 2024-04-27 17:16:06 +02:00
parent 6bb8c180b2
commit 66d0ce841c
2 changed files with 122 additions and 1 deletions

View file

@ -1,6 +1,9 @@
package utils package utils
import "strings" import (
"bytes"
"strings"
)
// SplitLines takes a multiline string and splits it on newlines // SplitLines takes a multiline string and splits it on newlines
// currently we are also stripping \r's which may have adverse effects for // currently we are also stripping \r's which may have adverse effects for
@ -43,3 +46,57 @@ func EscapeSpecialChars(str string) string {
"\v", "\\v", "\v", "\\v",
).Replace(str) ).Replace(str)
} }
func dropCR(data []byte) []byte {
if len(data) > 0 && data[len(data)-1] == '\r' {
return data[0 : len(data)-1]
}
return data
}
// ScanLinesAndTruncateWhenLongerThanBuffer returns a split function that can be
// used with bufio.Scanner.Split(). It is very similar to bufio.ScanLines,
// except that it will truncate lines that are longer than the scanner's read
// buffer (whereas bufio.ScanLines will return an error in that case, which is
// often difficult to handle).
//
// If you are using your own buffer for the scanner, you must set maxBufferSize
// to the same value as the max parameter that you passed to scanner.Buffer().
// Otherwise, maxBufferSize must be set to bufio.MaxScanTokenSize.
func ScanLinesAndTruncateWhenLongerThanBuffer(maxBufferSize int) func(data []byte, atEOF bool) (int, []byte, error) {
skipOverRemainderOfLongLine := false
return func(data []byte, atEOF bool) (int, []byte, error) {
if atEOF && len(data) == 0 {
// Done
return 0, nil, nil
}
if i := bytes.IndexByte(data, '\n'); i >= 0 {
if skipOverRemainderOfLongLine {
skipOverRemainderOfLongLine = false
return i + 1, nil, nil
}
return i + 1, dropCR(data[0:i]), nil
}
if atEOF {
if skipOverRemainderOfLongLine {
return len(data), nil, nil
}
return len(data), dropCR(data), nil
}
// Buffer is full, so we can't get more data
if len(data) >= maxBufferSize {
if skipOverRemainderOfLongLine {
return len(data), nil, nil
}
skipOverRemainderOfLongLine = true
return len(data), data, nil
}
// Request more data.
return 0, nil, nil
}
}

View file

@ -1,6 +1,8 @@
package utils package utils
import ( import (
"bufio"
"strings"
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@ -100,3 +102,65 @@ func TestNormalizeLinefeeds(t *testing.T) {
assert.EqualValues(t, string(s.expected), NormalizeLinefeeds(string(s.byteArray))) assert.EqualValues(t, string(s.expected), NormalizeLinefeeds(string(s.byteArray)))
} }
} }
func TestScanLinesAndTruncateWhenLongerThanBuffer(t *testing.T) {
type scenario struct {
input string
expectedLines []string
}
scenarios := []scenario{
{
"",
[]string{},
},
{
"\n",
[]string{""},
},
{
"abc",
[]string{"abc"},
},
{
"abc\ndef",
[]string{"abc", "def"},
},
{
"abc\n\ndef",
[]string{"abc", "", "def"},
},
{
"abc\r\ndef\r",
[]string{"abc", "def"},
},
{
"abcdef",
[]string{"abcde"},
},
{
"abcdef\n",
[]string{"abcde"},
},
{
"abcdef\nghijkl\nx",
[]string{"abcde", "ghijk", "x"},
},
{
"abc\ndefghijklmnopqrstuvw\nx",
[]string{"abc", "defgh", "x"},
},
}
for _, s := range scenarios {
scanner := bufio.NewScanner(strings.NewReader(s.input))
scanner.Buffer(make([]byte, 5), 5)
scanner.Split(ScanLinesAndTruncateWhenLongerThanBuffer(5))
result := []string{}
for scanner.Scan() {
result = append(result, scanner.Text())
}
assert.NoError(t, scanner.Err())
assert.EqualValues(t, s.expectedLines, result)
}
}