Skip to content

Commit

Permalink
feat: added ansi slice function
Browse files Browse the repository at this point in the history
Based on the truncate function.
Specify a start point (inclusive) and end point (exclusive) in cell-width units, and get a string back at that exact size.
Handles ansi-codes and multi-length characters.
  • Loading branch information
sukus21 authored and aymanbagabas committed Jun 24, 2024
1 parent e9f9f85 commit bdd314f
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 0 deletions.
111 changes: 111 additions & 0 deletions ansi/slice.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package ansi

import (
"bytes"

"github.com/charmbracelet/x/ansi/parser"
"github.com/rivo/uniseg"
)

// Slice slices a string to a given length, starting from X cell position.
// This function is aware of ANSI escape codes and will not break them, and
// accounts for wide-characters (such as East Asians and emojis).
//
// If a string is cut in the middle of a wide character, padding (in the
// form of spaces) is inserted. This is done in order to maintain the width
// of the input string.
func Slice(s string, start int, end int) string {
if end < start || start == end || s == "" {
return ""
}

var cluster []byte
var buf bytes.Buffer
curPos := 0
pstate := parser.GroundState // initial state
b := []byte(s)

// Here we iterate over the bytes of the string and collect printable
// characters and runes. We also keep track of the scan position in cells.
// Once we reach the given length, we start ignoring characters and only
// collect ANSI escape codes until we reach the end of string.
for i := 0; i < len(b); i++ {
state, action := parser.Table.Transition(pstate, b[i])

switch action {
case parser.PrintAction:
// Single/zero width character, fast path
if utf8ByteLen(b[i]) <= 1 {
if curPos >= start && curPos < end {
buf.WriteByte(b[i])
}
curPos++
continue
}

// This action happens when we transition to the Utf8State.
var width int
cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
pstate = parser.GroundState
oldPos := curPos
curPos += width

// When reading multiple characters, we need to advance i further.
// We subtract one, because the loop adds that one by default.
i += len(cluster) - 1

// Before scope, skip
if curPos <= start {
continue
}

// Cut off at beginning, write begin padding
if oldPos < start {
diff := curPos - start
for diff > 0 {
buf.WriteByte(' ')
diff--
}
continue
}

// Fits inside perfectly, write
if curPos <= end {
buf.Write(cluster)
continue
}

// Cut off at end, write end padding
if oldPos < end {
diff := width - (curPos - end)
for diff > 0 {
buf.WriteByte(' ')
diff--
}
continue
}

// Beyond scope, skip

// Always collect ansi codes
default:
buf.WriteByte(b[i])
}

// Transition to the next state.
pstate = state
}

// Ensure width matches requested
if curPos < end-start {
diff := (end - start) - curPos

for diff > 0 {
buf.WriteByte(' ')
diff--
}
}

// Return sliced string
return buf.String()
}
64 changes: 64 additions & 0 deletions ansi/slice_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package ansi

import (
"testing"
)

func TestSlice(t *testing.T) {
cases := []struct {
name string
input string
start int
end int
expect string
}{
{"empty", "", 0, 0, ""},
{"simple", "foobar", 0, 3, "foo"},
{"passthrough", "foobar", 0, 6, "foobar"},
{"ascii", "hello", 0, 3, "hel"},
{"emoji", "👋", 0, 2, "👋"},
{"wideemoji", "🫧", 0, 2, "🫧"},
{"controlemoji", "\x1b[31mhello 👋abc\x1b[0m", 0, 8, "\x1b[31mhello 👋\x1b[0m"},
{"osc8", "\x1b]8;;https://charm.sh\x1b\\Charmbracelet 🫧\x1b]8;;\x1b\\", 0, 5, "\x1b]8;;https://charm.sh\x1b\\Charm\x1b]8;;\x1b\\"},
{"osc8_8bit", "\x9d8;;https://charm.sh\x9cCharmbracelet 🫧\x9d8;;\x9c", 0, 5, "\x9d8;;https://charm.sh\x9cCharm\x9d8;;\x9c"},
{"noop", "\x1B[7m--", 0, 2, "\x1B[7m--"},
{"double_width", "\x1B[38;2;249;38;114m你好\x1B[0m", 0, 3, "\x1B[38;2;249;38;114m你 \x1B[0m"},
{"double_width_rune", "你", 0, 1, " "},
{"double_width_runes", "你好", 0, 2, "你"},
{"spaces_only", " ", 0, 2, " "},
{"same_width", "foo", 0, 3, "foo"},
{"style", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", 0, 8, "I really\x1B[38;2;249;38;114m\x1B[0m"},
{"unicode", "\x1b[35mClaire‘s Boutique\x1b[0m", 0, 8, "\x1b[35mClaire‘s\x1b[0m"},
{"wide_chars", "こんにちは", 0, 7, "こんに "},
{"style_wide_chars", "\x1b[35mこんにちは\x1b[m", 0, 7, "\x1b[35mこんに \x1b[m"},
{"osc8_lf", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", 0, 9, "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\n\x1b]8;;\x1b\\"},
{"beginning_whitespace", "👋🤭🥳😊👌", 1, 6, " 🤭🥳"},
{"ending_whitespace", "👋🤭🥳😊👌", 4, 9, "🥳😊 "},
{"double_whitespace", "👋🤭🥳😊👌", 1, 9, " 🤭🥳😊 "},
{"width_match", "abc", 0, 5, "abc "},
}

for i, c := range cases {
t.Run(c.name, func(t *testing.T) {
result := Slice(c.input, c.start, c.end)
if result != c.expect {
t.Errorf("test case %d failed: expected %q, got %q", i+1, c.expect, result)
}
originalLen := c.end - c.start
resultLen := StringWidth(result)
if originalLen != resultLen {
t.Errorf("test case %d failed: length does not match, expected %d, got %d", i+1, originalLen, resultLen)
}
})
}
}

func BenchmarkSliceString(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
b.ReportAllocs()
b.ResetTimer()
for pb.Next() {
Slice("foo", 1, 2)
}
})
}
1 change: 1 addition & 0 deletions go.work.sum
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE=
golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
Expand Down

0 comments on commit bdd314f

Please sign in to comment.