mirror of
https://github.com/jesseduffield/lazygit.git
synced 2025-05-14 05:14:45 +02:00
The main change here is to bump tcell to v2.7.1, which should fix problems with multibyte characters on Windows.
331 lines
9.7 KiB
Go
331 lines
9.7 KiB
Go
package uniseg
|
|
|
|
import "unicode/utf8"
|
|
|
|
// Graphemes implements an iterator over Unicode grapheme clusters, or
|
|
// user-perceived characters. While iterating, it also provides information
|
|
// about word boundaries, sentence boundaries, line breaks, and monospace
|
|
// character widths.
|
|
//
|
|
// After constructing the class via [NewGraphemes] for a given string "str",
|
|
// [Graphemes.Next] is called for every grapheme cluster in a loop until it
|
|
// returns false. Inside the loop, information about the grapheme cluster as
|
|
// well as boundary information and character width is available via the various
|
|
// methods (see examples below).
|
|
//
|
|
// This class basically wraps the [StepString] parser and provides a convenient
|
|
// interface to it. If you are only interested in some parts of this package's
|
|
// functionality, using the specialized functions starting with "First" is
|
|
// almost always faster.
|
|
type Graphemes struct {
|
|
// The original string.
|
|
original string
|
|
|
|
// The remaining string to be parsed.
|
|
remaining string
|
|
|
|
// The current grapheme cluster.
|
|
cluster string
|
|
|
|
// The byte offset of the current grapheme cluster relative to the original
|
|
// string.
|
|
offset int
|
|
|
|
// The current boundary information of the [Step] parser.
|
|
boundaries int
|
|
|
|
// The current state of the [Step] parser.
|
|
state int
|
|
}
|
|
|
|
// NewGraphemes returns a new grapheme cluster iterator.
|
|
func NewGraphemes(str string) *Graphemes {
|
|
return &Graphemes{
|
|
original: str,
|
|
remaining: str,
|
|
state: -1,
|
|
}
|
|
}
|
|
|
|
// Next advances the iterator by one grapheme cluster and returns false if no
|
|
// clusters are left. This function must be called before the first cluster is
|
|
// accessed.
|
|
func (g *Graphemes) Next() bool {
|
|
if len(g.remaining) == 0 {
|
|
// We're already past the end.
|
|
g.state = -2
|
|
g.cluster = ""
|
|
return false
|
|
}
|
|
g.offset += len(g.cluster)
|
|
g.cluster, g.remaining, g.boundaries, g.state = StepString(g.remaining, g.state)
|
|
return true
|
|
}
|
|
|
|
// Runes returns a slice of runes (code points) which corresponds to the current
|
|
// grapheme cluster. If the iterator is already past the end or [Graphemes.Next]
|
|
// has not yet been called, nil is returned.
|
|
func (g *Graphemes) Runes() []rune {
|
|
if g.state < 0 {
|
|
return nil
|
|
}
|
|
return []rune(g.cluster)
|
|
}
|
|
|
|
// Str returns a substring of the original string which corresponds to the
|
|
// current grapheme cluster. If the iterator is already past the end or
|
|
// [Graphemes.Next] has not yet been called, an empty string is returned.
|
|
func (g *Graphemes) Str() string {
|
|
return g.cluster
|
|
}
|
|
|
|
// Bytes returns a byte slice which corresponds to the current grapheme cluster.
|
|
// If the iterator is already past the end or [Graphemes.Next] has not yet been
|
|
// called, nil is returned.
|
|
func (g *Graphemes) Bytes() []byte {
|
|
if g.state < 0 {
|
|
return nil
|
|
}
|
|
return []byte(g.cluster)
|
|
}
|
|
|
|
// Positions returns the interval of the current grapheme cluster as byte
|
|
// positions into the original string. The first returned value "from" indexes
|
|
// the first byte and the second returned value "to" indexes the first byte that
|
|
// is not included anymore, i.e. str[from:to] is the current grapheme cluster of
|
|
// the original string "str". If [Graphemes.Next] has not yet been called, both
|
|
// values are 0. If the iterator is already past the end, both values are 1.
|
|
func (g *Graphemes) Positions() (int, int) {
|
|
if g.state == -1 {
|
|
return 0, 0
|
|
} else if g.state == -2 {
|
|
return 1, 1
|
|
}
|
|
return g.offset, g.offset + len(g.cluster)
|
|
}
|
|
|
|
// IsWordBoundary returns true if a word ends after the current grapheme
|
|
// cluster.
|
|
func (g *Graphemes) IsWordBoundary() bool {
|
|
if g.state < 0 {
|
|
return true
|
|
}
|
|
return g.boundaries&MaskWord != 0
|
|
}
|
|
|
|
// IsSentenceBoundary returns true if a sentence ends after the current
|
|
// grapheme cluster.
|
|
func (g *Graphemes) IsSentenceBoundary() bool {
|
|
if g.state < 0 {
|
|
return true
|
|
}
|
|
return g.boundaries&MaskSentence != 0
|
|
}
|
|
|
|
// LineBreak returns whether the line can be broken after the current grapheme
|
|
// cluster. A value of [LineDontBreak] means the line may not be broken, a value
|
|
// of [LineMustBreak] means the line must be broken, and a value of
|
|
// [LineCanBreak] means the line may or may not be broken.
|
|
func (g *Graphemes) LineBreak() int {
|
|
if g.state == -1 {
|
|
return LineDontBreak
|
|
}
|
|
if g.state == -2 {
|
|
return LineMustBreak
|
|
}
|
|
return g.boundaries & MaskLine
|
|
}
|
|
|
|
// Width returns the monospace width of the current grapheme cluster.
|
|
func (g *Graphemes) Width() int {
|
|
if g.state < 0 {
|
|
return 0
|
|
}
|
|
return g.boundaries >> ShiftWidth
|
|
}
|
|
|
|
// Reset puts the iterator into its initial state such that the next call to
|
|
// [Graphemes.Next] sets it to the first grapheme cluster again.
|
|
func (g *Graphemes) Reset() {
|
|
g.state = -1
|
|
g.offset = 0
|
|
g.cluster = ""
|
|
g.remaining = g.original
|
|
}
|
|
|
|
// GraphemeClusterCount returns the number of user-perceived characters
|
|
// (grapheme clusters) for the given string.
|
|
func GraphemeClusterCount(s string) (n int) {
|
|
state := -1
|
|
for len(s) > 0 {
|
|
_, s, _, state = FirstGraphemeClusterInString(s, state)
|
|
n++
|
|
}
|
|
return
|
|
}
|
|
|
|
// ReverseString reverses the given string while observing grapheme cluster
|
|
// boundaries.
|
|
func ReverseString(s string) string {
|
|
str := []byte(s)
|
|
reversed := make([]byte, len(str))
|
|
state := -1
|
|
index := len(str)
|
|
for len(str) > 0 {
|
|
var cluster []byte
|
|
cluster, str, _, state = FirstGraphemeCluster(str, state)
|
|
index -= len(cluster)
|
|
copy(reversed[index:], cluster)
|
|
if index <= len(str)/2 {
|
|
break
|
|
}
|
|
}
|
|
return string(reversed)
|
|
}
|
|
|
|
// The number of bits the grapheme property must be shifted to make place for
|
|
// grapheme states.
|
|
const shiftGraphemePropState = 4
|
|
|
|
// FirstGraphemeCluster returns the first grapheme cluster found in the given
|
|
// byte slice according to the rules of [Unicode Standard Annex #29, Grapheme
|
|
// Cluster Boundaries]. This function can be called continuously to extract all
|
|
// grapheme clusters from a byte slice, as illustrated in the example below.
|
|
//
|
|
// If you don't know the current state, for example when calling the function
|
|
// for the first time, you must pass -1. For consecutive calls, pass the state
|
|
// and rest slice returned by the previous call.
|
|
//
|
|
// The "rest" slice is the sub-slice of the original byte slice "b" starting
|
|
// after the last byte of the identified grapheme cluster. If the length of the
|
|
// "rest" slice is 0, the entire byte slice "b" has been processed. The
|
|
// "cluster" byte slice is the sub-slice of the input slice containing the
|
|
// identified grapheme cluster.
|
|
//
|
|
// The returned width is the width of the grapheme cluster for most monospace
|
|
// fonts where a value of 1 represents one character cell.
|
|
//
|
|
// Given an empty byte slice "b", the function returns nil values.
|
|
//
|
|
// While slightly less convenient than using the Graphemes class, this function
|
|
// has much better performance and makes no allocations. It lends itself well to
|
|
// large byte slices.
|
|
//
|
|
// [Unicode Standard Annex #29, Grapheme Cluster Boundaries]: http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
|
|
func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, width, newState int) {
|
|
// An empty byte slice returns nothing.
|
|
if len(b) == 0 {
|
|
return
|
|
}
|
|
|
|
// Extract the first rune.
|
|
r, length := utf8.DecodeRune(b)
|
|
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
|
|
var prop int
|
|
if state < 0 {
|
|
prop = propertyGraphemes(r)
|
|
} else {
|
|
prop = state >> shiftGraphemePropState
|
|
}
|
|
return b, nil, runeWidth(r, prop), grAny | (prop << shiftGraphemePropState)
|
|
}
|
|
|
|
// If we don't know the state, determine it now.
|
|
var firstProp int
|
|
if state < 0 {
|
|
state, firstProp, _ = transitionGraphemeState(state, r)
|
|
} else {
|
|
firstProp = state >> shiftGraphemePropState
|
|
}
|
|
width += runeWidth(r, firstProp)
|
|
|
|
// Transition until we find a boundary.
|
|
for {
|
|
var (
|
|
prop int
|
|
boundary bool
|
|
)
|
|
|
|
r, l := utf8.DecodeRune(b[length:])
|
|
state, prop, boundary = transitionGraphemeState(state&maskGraphemeState, r)
|
|
|
|
if boundary {
|
|
return b[:length], b[length:], width, state | (prop << shiftGraphemePropState)
|
|
}
|
|
|
|
if firstProp == prExtendedPictographic {
|
|
if r == vs15 {
|
|
width = 1
|
|
} else if r == vs16 {
|
|
width = 2
|
|
}
|
|
} else if firstProp != prRegionalIndicator && firstProp != prL {
|
|
width += runeWidth(r, prop)
|
|
}
|
|
|
|
length += l
|
|
if len(b) <= length {
|
|
return b, nil, width, grAny | (prop << shiftGraphemePropState)
|
|
}
|
|
}
|
|
}
|
|
|
|
// FirstGraphemeClusterInString is like [FirstGraphemeCluster] but its input and
|
|
// outputs are strings.
|
|
func FirstGraphemeClusterInString(str string, state int) (cluster, rest string, width, newState int) {
|
|
// An empty string returns nothing.
|
|
if len(str) == 0 {
|
|
return
|
|
}
|
|
|
|
// Extract the first rune.
|
|
r, length := utf8.DecodeRuneInString(str)
|
|
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
|
|
var prop int
|
|
if state < 0 {
|
|
prop = propertyGraphemes(r)
|
|
} else {
|
|
prop = state >> shiftGraphemePropState
|
|
}
|
|
return str, "", runeWidth(r, prop), grAny | (prop << shiftGraphemePropState)
|
|
}
|
|
|
|
// If we don't know the state, determine it now.
|
|
var firstProp int
|
|
if state < 0 {
|
|
state, firstProp, _ = transitionGraphemeState(state, r)
|
|
} else {
|
|
firstProp = state >> shiftGraphemePropState
|
|
}
|
|
width += runeWidth(r, firstProp)
|
|
|
|
// Transition until we find a boundary.
|
|
for {
|
|
var (
|
|
prop int
|
|
boundary bool
|
|
)
|
|
|
|
r, l := utf8.DecodeRuneInString(str[length:])
|
|
state, prop, boundary = transitionGraphemeState(state&maskGraphemeState, r)
|
|
|
|
if boundary {
|
|
return str[:length], str[length:], width, state | (prop << shiftGraphemePropState)
|
|
}
|
|
|
|
if firstProp == prExtendedPictographic {
|
|
if r == vs15 {
|
|
width = 1
|
|
} else if r == vs16 {
|
|
width = 2
|
|
}
|
|
} else if firstProp != prRegionalIndicator && firstProp != prL {
|
|
width += runeWidth(r, prop)
|
|
}
|
|
|
|
length += l
|
|
if len(str) <= length {
|
|
return str, "", width, grAny | (prop << shiftGraphemePropState)
|
|
}
|
|
}
|
|
}
|