From 66d0ce841c9d89bbca3663e4c51b59120838d074 Mon Sep 17 00:00:00 2001 From: Stefan Haller Date: Sat, 27 Apr 2024 17:16:06 +0200 Subject: [PATCH] Implement ScanLinesAndTruncateWhenLongerThanBuffer --- pkg/utils/lines.go | 59 ++++++++++++++++++++++++++++++++++++- pkg/utils/lines_test.go | 64 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 1 deletion(-) diff --git a/pkg/utils/lines.go b/pkg/utils/lines.go index 662ba2f9b..c70d02ffc 100644 --- a/pkg/utils/lines.go +++ b/pkg/utils/lines.go @@ -1,6 +1,9 @@ package utils -import "strings" +import ( + "bytes" + "strings" +) // SplitLines takes a multiline string and splits it on newlines // currently we are also stripping \r's which may have adverse effects for @@ -43,3 +46,57 @@ func EscapeSpecialChars(str string) string { "\v", "\\v", ).Replace(str) } + +func dropCR(data []byte) []byte { + if len(data) > 0 && data[len(data)-1] == '\r' { + return data[0 : len(data)-1] + } + return data +} + +// ScanLinesAndTruncateWhenLongerThanBuffer returns a split function that can be +// used with bufio.Scanner.Split(). It is very similar to bufio.ScanLines, +// except that it will truncate lines that are longer than the scanner's read +// buffer (whereas bufio.ScanLines will return an error in that case, which is +// often difficult to handle). +// +// If you are using your own buffer for the scanner, you must set maxBufferSize +// to the same value as the max parameter that you passed to scanner.Buffer(). +// Otherwise, maxBufferSize must be set to bufio.MaxScanTokenSize. +func ScanLinesAndTruncateWhenLongerThanBuffer(maxBufferSize int) func(data []byte, atEOF bool) (int, []byte, error) { + skipOverRemainderOfLongLine := false + + return func(data []byte, atEOF bool) (int, []byte, error) { + if atEOF && len(data) == 0 { + // Done + return 0, nil, nil + } + if i := bytes.IndexByte(data, '\n'); i >= 0 { + if skipOverRemainderOfLongLine { + skipOverRemainderOfLongLine = false + return i + 1, nil, nil + } + return i + 1, dropCR(data[0:i]), nil + } + if atEOF { + if skipOverRemainderOfLongLine { + return len(data), nil, nil + } + + return len(data), dropCR(data), nil + } + + // Buffer is full, so we can't get more data + if len(data) >= maxBufferSize { + if skipOverRemainderOfLongLine { + return len(data), nil, nil + } + + skipOverRemainderOfLongLine = true + return len(data), data, nil + } + + // Request more data. + return 0, nil, nil + } +} diff --git a/pkg/utils/lines_test.go b/pkg/utils/lines_test.go index e7171022b..2192a3780 100644 --- a/pkg/utils/lines_test.go +++ b/pkg/utils/lines_test.go @@ -1,6 +1,8 @@ package utils import ( + "bufio" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -100,3 +102,65 @@ func TestNormalizeLinefeeds(t *testing.T) { assert.EqualValues(t, string(s.expected), NormalizeLinefeeds(string(s.byteArray))) } } + +func TestScanLinesAndTruncateWhenLongerThanBuffer(t *testing.T) { + type scenario struct { + input string + expectedLines []string + } + + scenarios := []scenario{ + { + "", + []string{}, + }, + { + "\n", + []string{""}, + }, + { + "abc", + []string{"abc"}, + }, + { + "abc\ndef", + []string{"abc", "def"}, + }, + { + "abc\n\ndef", + []string{"abc", "", "def"}, + }, + { + "abc\r\ndef\r", + []string{"abc", "def"}, + }, + { + "abcdef", + []string{"abcde"}, + }, + { + "abcdef\n", + []string{"abcde"}, + }, + { + "abcdef\nghijkl\nx", + []string{"abcde", "ghijk", "x"}, + }, + { + "abc\ndefghijklmnopqrstuvw\nx", + []string{"abc", "defgh", "x"}, + }, + } + + for _, s := range scenarios { + scanner := bufio.NewScanner(strings.NewReader(s.input)) + scanner.Buffer(make([]byte, 5), 5) + scanner.Split(ScanLinesAndTruncateWhenLongerThanBuffer(5)) + result := []string{} + for scanner.Scan() { + result = append(result, scanner.Text()) + } + assert.NoError(t, scanner.Err()) + assert.EqualValues(t, s.expectedLines, result) + } +}