diff --git a/ansi/slice.go b/ansi/slice.go new file mode 100644 index 00000000..232dc41f --- /dev/null +++ b/ansi/slice.go @@ -0,0 +1,111 @@ +package ansi + +import ( + "bytes" + + "github.com/charmbracelet/x/ansi/parser" + "github.com/rivo/uniseg" +) + +// Slice slices a string to a given length, starting from X cell position. +// This function is aware of ANSI escape codes and will not break them, and +// accounts for wide-characters (such as East Asians and emojis). +// +// If a string is cut in the middle of a wide character, padding (in the +// form of spaces) is inserted. This is done in order to maintain the width +// of the input string. +func Slice(s string, start int, end int) string { + if end < start || start == end || s == "" { + return "" + } + + var cluster []byte + var buf bytes.Buffer + curPos := 0 + pstate := parser.GroundState // initial state + b := []byte(s) + + // Here we iterate over the bytes of the string and collect printable + // characters and runes. We also keep track of the scan position in cells. + // Once we reach the given length, we start ignoring characters and only + // collect ANSI escape codes until we reach the end of string. + for i := 0; i < len(b); i++ { + state, action := parser.Table.Transition(pstate, b[i]) + + switch action { + case parser.PrintAction: + // Single/zero width character, fast path + if utf8ByteLen(b[i]) <= 1 { + if curPos >= start && curPos < end { + buf.WriteByte(b[i]) + } + curPos++ + continue + } + + // This action happens when we transition to the Utf8State. + var width int + cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1) + pstate = parser.GroundState + oldPos := curPos + curPos += width + + // When reading multiple characters, we need to advance i further. + // We subtract one, because the loop adds that one by default. + i += len(cluster) - 1 + + // Before scope, skip + if curPos <= start { + continue + } + + // Cut off at beginning, write begin padding + if oldPos < start { + diff := curPos - start + for diff > 0 { + buf.WriteByte(' ') + diff-- + } + continue + } + + // Fits inside perfectly, write + if curPos <= end { + buf.Write(cluster) + continue + } + + // Cut off at end, write end padding + if oldPos < end { + diff := width - (curPos - end) + for diff > 0 { + buf.WriteByte(' ') + diff-- + } + continue + } + + // Beyond scope, skip + + // Always collect ansi codes + default: + buf.WriteByte(b[i]) + } + + // Transition to the next state. + pstate = state + } + + // Ensure width matches requested + if curPos < end-start { + diff := (end - start) - curPos + + for diff > 0 { + buf.WriteByte(' ') + diff-- + } + } + + // Return sliced string + return buf.String() +} diff --git a/ansi/slice_test.go b/ansi/slice_test.go new file mode 100644 index 00000000..6c2d4691 --- /dev/null +++ b/ansi/slice_test.go @@ -0,0 +1,64 @@ +package ansi + +import ( + "testing" +) + +func TestSlice(t *testing.T) { + cases := []struct { + name string + input string + start int + end int + expect string + }{ + {"empty", "", 0, 0, ""}, + {"simple", "foobar", 0, 3, "foo"}, + {"passthrough", "foobar", 0, 6, "foobar"}, + {"ascii", "hello", 0, 3, "hel"}, + {"emoji", "👋", 0, 2, "👋"}, + {"wideemoji", "🫧", 0, 2, "🫧"}, + {"controlemoji", "\x1b[31mhello 👋abc\x1b[0m", 0, 8, "\x1b[31mhello 👋\x1b[0m"}, + {"osc8", "\x1b]8;;https://charm.sh\x1b\\Charmbracelet 🫧\x1b]8;;\x1b\\", 0, 5, "\x1b]8;;https://charm.sh\x1b\\Charm\x1b]8;;\x1b\\"}, + {"osc8_8bit", "\x9d8;;https://charm.sh\x9cCharmbracelet 🫧\x9d8;;\x9c", 0, 5, "\x9d8;;https://charm.sh\x9cCharm\x9d8;;\x9c"}, + {"noop", "\x1B[7m--", 0, 2, "\x1B[7m--"}, + {"double_width", "\x1B[38;2;249;38;114m你好\x1B[0m", 0, 3, "\x1B[38;2;249;38;114m你 \x1B[0m"}, + {"double_width_rune", "你", 0, 1, " "}, + {"double_width_runes", "你好", 0, 2, "你"}, + {"spaces_only", " ", 0, 2, " "}, + {"same_width", "foo", 0, 3, "foo"}, + {"style", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", 0, 8, "I really\x1B[38;2;249;38;114m\x1B[0m"}, + {"unicode", "\x1b[35mClaire‘s Boutique\x1b[0m", 0, 8, "\x1b[35mClaire‘s\x1b[0m"}, + {"wide_chars", "こんにちは", 0, 7, "こんに "}, + {"style_wide_chars", "\x1b[35mこんにちは\x1b[m", 0, 7, "\x1b[35mこんに \x1b[m"}, + {"osc8_lf", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", 0, 9, "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nส\x1b]8;;\x1b\\"}, + {"beginning_whitespace", "👋🤭🥳😊👌", 1, 6, " 🤭🥳"}, + {"ending_whitespace", "👋🤭🥳😊👌", 4, 9, "🥳😊 "}, + {"double_whitespace", "👋🤭🥳😊👌", 1, 9, " 🤭🥳😊 "}, + {"width_match", "abc", 0, 5, "abc "}, + } + + for i, c := range cases { + t.Run(c.name, func(t *testing.T) { + result := Slice(c.input, c.start, c.end) + if result != c.expect { + t.Errorf("test case %d failed: expected %q, got %q", i+1, c.expect, result) + } + originalLen := c.end - c.start + resultLen := StringWidth(result) + if originalLen != resultLen { + t.Errorf("test case %d failed: length does not match, expected %d, got %d", i+1, originalLen, resultLen) + } + }) + } +} + +func BenchmarkSliceString(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + b.ReportAllocs() + b.ResetTimer() + for pb.Next() { + Slice("foo", 1, 2) + } + }) +} diff --git a/go.work.sum b/go.work.sum index d2d5110b..62fe5ed5 100644 --- a/go.work.sum +++ b/go.work.sum @@ -28,6 +28,7 @@ golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE= golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=