From 5b2270187a15a984b25a178a0858cc8228347fcd Mon Sep 17 00:00:00 2001 From: Ayman Bagabas Date: Mon, 26 Feb 2024 12:45:29 -0500 Subject: [PATCH] feat(term/ansi): implement csi, osc, and params sequence parsing --- exp/term/ansi/csi.go | 153 +++++++++++++++++++++++++++++++++++ exp/term/ansi/csi_test.go | 89 ++++++++++++++++++++ exp/term/ansi/osc.go | 141 ++++++++++++++++++++++++++++++++ exp/term/ansi/osc_test.go | 111 +++++++++++++++++++++++++ exp/term/ansi/params.go | 45 +++++++++++ exp/term/ansi/params_test.go | 27 +++++++ 6 files changed, 566 insertions(+) create mode 100644 exp/term/ansi/csi.go create mode 100644 exp/term/ansi/csi_test.go create mode 100644 exp/term/ansi/osc.go create mode 100644 exp/term/ansi/osc_test.go create mode 100644 exp/term/ansi/params.go create mode 100644 exp/term/ansi/params_test.go diff --git a/exp/term/ansi/csi.go b/exp/term/ansi/csi.go new file mode 100644 index 00000000..e3b43884 --- /dev/null +++ b/exp/term/ansi/csi.go @@ -0,0 +1,153 @@ +package ansi + +import ( + "strings" +) + +// CsiSequence represents a control sequence introducer (CSI) sequence. +// +// The sequence starts with a CSI sequence, CSI (0x9B) in a 8-bit environment +// or ESC [ (0x1B 0x5B) in a 7-bit environment, followed by any number of +// parameters in the range of 0x30-0x3F, then by any number of intermediate +// byte in the range of 0x20-0x2F, then finally with a single final byte in the +// range of 0x20-0x7E. +// +// CSI P..P I..I F +// +// See ECMA-48 § 5.4. +type CsiSequence string + +// IsValid reports whether the control sequence is valid. +func (c CsiSequence) IsValid() bool { + if len(c) == 0 { + return false + } + + var i int + if c[0] == CSI { + i++ + } else if len(c) > 1 && c[0] == ESC && c[1] == '[' { + i += 2 + } else { + return false + } + + // Parameters in the range 0x30-0x3F. + for ; i < len(c) && c[i] >= 0x30 && c[i] <= 0x3F; i++ { // nolint: revive + } + + // Intermediate bytes in the range 0x20-0x2F. + for ; i < len(c) && c[i] >= 0x20 && c[i] <= 0x2F; i++ { // nolint: revive + } + + // Final byte in the range 0x40-0x7E. + return i < len(c) && c[i] >= 0x40 && c[i] <= 0x7E +} + +// HasInitial reports whether the control sequence has an initial byte. +// This indicater a private sequence. +func (c CsiSequence) HasInitial() bool { + i := c.Initial() + return i != 0 +} + +// Initial returns the initial byte of the control sequence. +func (c CsiSequence) Initial() byte { + if len(c) == 0 { + return 0 + } + + i := strings.IndexFunc(string(c), func(r rune) bool { + return r >= 0x3C && r <= 0x3F + }) + if i == -1 { + return 0 + } + + return c[i] +} + +// Params returns the parameters of the control sequence. +func (c CsiSequence) Params() []byte { + if len(c) == 0 { + return []byte{} + } + + start := strings.IndexFunc(string(c), func(r rune) bool { + return r >= 0x30 && r <= 0x3F + }) + if start == -1 { + return []byte{} + } + + end := strings.IndexFunc(string(c[start:]), func(r rune) bool { + return r < 0x30 || r > 0x3F + }) + if end == -1 { + return []byte{} + } + + return []byte(c[start : start+end]) +} + +// Intermediates returns the intermediate bytes of the control sequence. +func (c CsiSequence) Intermediates() []byte { + if len(c) == 0 { + return []byte{} + } + + start := strings.IndexFunc(string(c), func(r rune) bool { + return r >= 0x20 && r <= 0x2F + }) + if start == -1 { + return []byte{} + } + + end := strings.IndexFunc(string(c[start:]), func(r rune) bool { + return r < 0x20 || r > 0x2F + }) + if end == -1 { + return []byte{} + } + + return []byte(c[start : start+end]) +} + +// Command returns the command byte of the control sequence. +// A CSI command byte is in the range of 0x40-0x7E. This includes ASCII +// - @ +// - A-Z +// - [ \ ] +// - ^ _ ` +// - a-z +// - { | } +// - ~ +func (c CsiSequence) Command() byte { + i := strings.LastIndexFunc(string(c), func(r rune) bool { + return r >= 0x40 && r <= 0x7E + }) + if i == -1 { + return 0 + } + + return c[i] +} + +// IsPrivate reports whether the control sequence is a private sequence. +// This means either the first parameter byte is in the range of 0x3C-0x3F or +// the command byte is in the range of 0x70-0x7E. +func (c CsiSequence) IsPrivate() bool { + if len(c) == 0 { + return false + } + + var i int + for i = 0; i < len(c); i++ { + if c[i] >= 0x30 && c[i] <= 0x3F { + break + } + } + + return (c[i] >= 0x3C && c[i] <= 0x3F) || + (c[len(c)-1] >= 0x70 && c[len(c)-1] <= 0x7E) +} diff --git a/exp/term/ansi/csi_test.go b/exp/term/ansi/csi_test.go new file mode 100644 index 00000000..71c35f27 --- /dev/null +++ b/exp/term/ansi/csi_test.go @@ -0,0 +1,89 @@ +package ansi + +import "testing" + +func TestCsiSequenceIsValid(t *testing.T) { + cases := []struct { + seq CsiSequence + valid bool + }{ + {CsiSequence(""), false}, + {CsiSequence("\x1b["), false}, + {CsiSequence("\x1b]"), false}, + {CsiSequence("\x9b"), false}, + {CsiSequence("\x1b[?1;2:1230"), false}, + {CsiSequence("\x1b[0A"), true}, + {CsiSequence("\x1b[A"), true}, + {CsiSequence("\x1b[ A"), true}, + {CsiSequence("\x1b[ #A"), true}, + {CsiSequence("\x1b[1 #A"), true}, + {CsiSequence("\x1b[1; #A"), true}, + {CsiSequence("\x1b[1;2 #A"), true}, + {CsiSequence("\x1b[1;2:3:4 #A"), true}, + {CsiSequence("\x1b[1;2:3:4: #["), true}, + {CsiSequence("\x1b[1;2;3;4;5;6;7;8;9A"), true}, + {CsiSequence("\x1b[?1;2A"), true}, + {CsiSequence("\x1b[?1;2:123A"), true}, + } + for _, c := range cases { + if got, want := c.seq.IsValid(), c.valid; got != want { + t.Errorf("got %t, want %t", got, want) + } + } +} + +func TestCsiSequenceParams(t *testing.T) { + cases := []struct { + seq CsiSequence + params string + }{ + {CsiSequence("\x1b[012;3"), ""}, + {CsiSequence("\x1b[A"), ""}, + {CsiSequence("\x1b[0A"), "0"}, + {CsiSequence("\x1b[1;2;3;4;5;6;7;8;9A"), "1;2;3;4;5;6;7;8;9"}, + {CsiSequence("\x1b[?1;2A"), "?1;2"}, + {CsiSequence("\x1b[?1;2:123A"), "?1;2:123"}, + } + for _, c := range cases { + if got, want := string(c.seq.Params()), c.params; got != want { + t.Errorf("got %q, want %q", got, want) + } + } +} + +func TestCsiSequenceIntermediates(t *testing.T) { + cases := []struct { + seq CsiSequence + intermediate string + }{ + {CsiSequence("\x1b[0A"), ""}, + {CsiSequence("\x1b[1;2;3;4;5;6;7;8;9A"), ""}, + {CsiSequence("\x1b[?1;2A"), ""}, + {CsiSequence("\x1b[?1;2:123A"), ""}, + {CsiSequence("\x1b[?1;2:123 A"), " "}, + {CsiSequence("\x1b[123 #!A"), " #!"}, + } + for _, c := range cases { + if got, want := string(c.seq.Intermediates()), c.intermediate; got != want { + t.Errorf("got %q, want %q", got, want) + } + } +} + +func TestCsiSequenceCommand(t *testing.T) { + cases := []struct { + seq CsiSequence + command byte + }{ + {CsiSequence(""), 0}, + {CsiSequence("\x1b[0A"), 'A'}, + {CsiSequence("\x1b[1;2;3;4;5;6;7;8;9A"), 'A'}, + {CsiSequence("\x1b[?1;2A"), 'A'}, + {CsiSequence("\x1b[?1;2:123A"), 'A'}, + } + for _, c := range cases { + if got, want := c.seq.Command(), c.command; got != want { + t.Errorf("got %q, want %q", got, want) + } + } +} diff --git a/exp/term/ansi/osc.go b/exp/term/ansi/osc.go new file mode 100644 index 00000000..ad00b907 --- /dev/null +++ b/exp/term/ansi/osc.go @@ -0,0 +1,141 @@ +package ansi + +import ( + "strings" + "unicode" +) + +// OscSequence represents an OSC sequence. +// +// The sequence starts with a OSC sequence, OSC (0x9D) in a 8-bit environment +// or ESC ] (0x1B 0x5D) in a 7-bit environment, followed by positive integer identifier, +// then by arbitrary data terminated by a ST (0x9C) in a 8-bit environment, +// ESC \ (0x1B 0x5C) in a 7-bit environment, or BEL (0x07) for backwards compatibility. +// +// OSC Ps ; Pt ST +// OSC Ps ; Pt BEL +// +// See ECMA-48 § 5.7. +type OscSequence string + +// IsValid reports whether the control sequence is valid. +// We allow UTF-8 in the data. +func (o OscSequence) IsValid() bool { + if len(o) == 0 { + return false + } + + var i int + if o[0] == OSC { + i++ + } else if len(o) > 1 && o[0] == ESC && o[1] == ']' { + i += 2 + } else { + return false + } + + // Osc data + start := i + end := -1 + for ; i < len(o) && o[i] >= 0x20 && o[i] <= 0xFF && o[i] != ST && o[i] != BEL && o[i] != ESC; i++ { // nolint: revive + if end == -1 && o[i] == ';' { + end = i + } + } + if end == -1 { + end = i + } + + // Identifier must be all digits. + for j := start; j < end; j++ { + if !unicode.IsDigit(rune(o[j])) { + return false + } + } + + // Terminator is one of the following: + // - ST (0x9C) + // - ESC \ (0x1B 0x5C) + // - BEL (0x07) + return i < len(o) && + (o[i] == ST || o[i] == BEL || (i+1 < len(o) && o[i] == ESC && o[i+1] == '\\')) +} + +// Identifier returns the identifier of the control sequence. +func (o OscSequence) Identifier() string { + if len(o) == 0 { + return "" + } + + start := strings.IndexFunc(string(o), func(r rune) bool { + return r >= '0' && r <= '9' + }) + if start == -1 { + return "" + } + end := strings.Index(string(o), ";") + if end == -1 { + for i := len(o) - 1; i > start; i-- { + if o[i] == ST || o[i] == BEL || o[i] == ESC { + end = i + break + } + } + } + if end == -1 || start >= end { + return "" + } + + id := string(o[start:end]) + for _, r := range id { + if !unicode.IsDigit(r) { + return "" + } + } + + return id +} + +// Data returns the data of the control sequence. +func (o OscSequence) Data() string { + if len(o) == 0 { + return "" + } + + start := strings.Index(string(o), ";") + if start == -1 { + return "" + } + + end := -1 + for i := len(o) - 1; i > start; i-- { + if o[i] == ST || o[i] == BEL || o[i] == ESC { + end = i + break + } + } + if end == -1 || start >= end { + return "" + } + + return string(o[start+1 : end]) +} + +// Terminator returns the terminator of the control sequence. +func (o OscSequence) Terminator() string { + if len(o) == 0 { + return "" + } + + i := len(o) - 1 + for ; i > 0; i-- { + if o[i] == ST || o[i] == BEL || o[i] == ESC { + break + } + } + if i == -1 { + return "" + } + + return string(o[i:]) +} diff --git a/exp/term/ansi/osc_test.go b/exp/term/ansi/osc_test.go new file mode 100644 index 00000000..8fdc8be2 --- /dev/null +++ b/exp/term/ansi/osc_test.go @@ -0,0 +1,111 @@ +package ansi + +import "testing" + +func TestOscSequenceIsValid(t *testing.T) { + cases := []struct { + in string + expect bool + }{ + {"", false}, + {"\x1b]0", false}, + {"\x1b]0;", false}, + {"\x1b]0;hello", false}, + {"\x1b]1:hello\x07", false}, + {"\x1b]0;hello\x07", true}, + {"\x1b]0;hello\x1b\\", true}, + {"\x1b]1234;hello\x1b\\", true}, + {"\x1b]1234\x1b\\", true}, + {"\x1b]1234;abc;hello\x1b\\", true}, + {"\x9b1234;hello\x9c", false}, + {"\x9d]1234;hello\x9c", false}, + {"\x9d1234;hello\x9c", true}, + } + + for i, c := range cases { + seq := OscSequence(c.in) + if seq.IsValid() != c.expect { + t.Errorf("case %d: expected %v, got %v", i+1, c.expect, seq.IsValid()) + } + } +} + +func TestOscSequenceIdentifier(t *testing.T) { + cases := []struct { + in string + expect string + }{ + {"", ""}, + {"\x1b]0", ""}, + {"\x1b]0;", "0"}, + {"\x1b]0;hello", "0"}, + {"\x1b]1:hello\x07", ""}, + {"\x1b]0;hello\x07", "0"}, + {"\x1b]0\x07", "0"}, + {"\x1b]0;hello\x1b\\", "0"}, + {"\x1b]1234;hello\x1b\\", "1234"}, + {"\x1b]1234;abc;hello\x1b\\", "1234"}, + {"\x9b1234;hello\x9c", "1234"}, + {"\x9d]1234;hello\x9c", "1234"}, + {"\x9d1234\x9c", "1234"}, + {"\x9d1234;hello\x9c", "1234"}, + } + + for i, c := range cases { + seq := OscSequence(c.in) + if seq.Identifier() != c.expect { + t.Errorf("case %d: expected %q, got %q", i+1, c.expect, seq.Identifier()) + } + } +} + +func TestOscSequenceData(t *testing.T) { + cases := []struct { + in string + expect string + }{ + {"", ""}, + {"\x1b]0", ""}, + {"\x1b]0;", ""}, + {"\x1b]0;hello", ""}, + {"\x1b]1:hello\x07", ""}, + {"\x1b]0;hello\x07", "hello"}, + {"\x1b]0;hello\x1b\\", "hello"}, + {"\x1b]1234;hello\x1b\\", "hello"}, + {"\x1b]1234;abc;hello\x1b\\", "abc;hello"}, + {"\x9b1234;hello\x9c", "hello"}, + {"\x9d]1234;hello\x9c", "hello"}, + {"\x9d1234;hello\x9c", "hello"}, + } + + for i, c := range cases { + seq := OscSequence(c.in) + if seq.Data() != c.expect { + t.Errorf("case %d: expected %q, got %q", i+1, c.expect, seq.Data()) + } + } +} + +func TestOscSequenceTerminator(t *testing.T) { + cases := []struct { + in string + expect string + }{ + {"", ""}, + {"\x1b]1:hello\x07", "\x07"}, + {"\x1b]0;hello\x07", "\x07"}, + {"\x1b]0;hello\x1b\\", "\x1b\\"}, + {"\x1b]1234;hello\x1b\\", "\x1b\\"}, + {"\x1b]1234;abc;hello\x1b\\", "\x1b\\"}, + {"\x9b1234;hello\x9c", "\x9c"}, + {"\x9d]1234;hello\x9c", "\x9c"}, + {"\x9d1234;hello\x9c", "\x9c"}, + } + + for i, c := range cases { + seq := OscSequence(c.in) + if seq.Terminator() != c.expect { + t.Errorf("case %d: expected %q, got %q", i+1, c.expect, seq.Terminator()) + } + } +} diff --git a/exp/term/ansi/params.go b/exp/term/ansi/params.go new file mode 100644 index 00000000..a1bb4249 --- /dev/null +++ b/exp/term/ansi/params.go @@ -0,0 +1,45 @@ +package ansi + +import ( + "bytes" +) + +// Params parses and returns a list of control sequence parameters. +// +// Parameters are positive integers separated by semicolons. Empty parameters +// default to zero. Parameters can have sub-parameters separated by colons. +// +// Any non-parameter bytes are ignored. This includes bytes that are not in the +// range of 0x30-0x3B. +// +// See ECMA-48 § 5.4.1. +func Params(p []byte) [][]uint { + if len(p) == 0 { + return [][]uint{} + } + + // Filter out non-parameter bytes i.e. non 0x30-0x3B. + p = bytes.TrimFunc(p, func(r rune) bool { + return r < 0x30 || r > 0x3B + }) + + parts := bytes.Split(p, []byte{';'}) + params := make([][]uint, len(parts)) + for i, part := range parts { + sparts := bytes.Split(part, []byte{':'}) + params[i] = make([]uint, len(sparts)) + for j, spart := range sparts { + params[i][j] = bytesToUint16(spart) + } + } + + return params +} + +func bytesToUint16(b []byte) uint { + var n uint + for _, c := range b { + n = n*10 + uint(c-'0') + } + return n +} diff --git a/exp/term/ansi/params_test.go b/exp/term/ansi/params_test.go new file mode 100644 index 00000000..07fd4eec --- /dev/null +++ b/exp/term/ansi/params_test.go @@ -0,0 +1,27 @@ +package ansi + +import ( + "reflect" + "testing" +) + +func TestParamsParameters(t *testing.T) { + cases := []struct { + params string + want [][]uint + }{ + {"", [][]uint{}}, + {"0", [][]uint{{0}}}, + {"1;2", [][]uint{{1}, {2}}}, + {"1:2;3", [][]uint{{1, 2}, {3}}}, + {"1;2;q", [][]uint{{1}, {2}, {0}}}, + {"1;;2:255:255:0", [][]uint{{1}, {0}, {2, 255, 255, 0}}}, + {"1;2:::0", [][]uint{{1}, {2, 0, 0, 0}}}, + } + for i, c := range cases { + got := Params([]byte(c.params)) + if !reflect.DeepEqual(got, c.want) { + t.Errorf("case %d, got %v, want %v", i+1, got, c.want) + } + } +}