From 71938d67a5a4eb1b411f123fc4f74914181cbccb Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Thu, 12 Sep 2024 01:43:56 +0200 Subject: [PATCH] xkbcli how-to-type: Enhance arguments parsing & doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the positional parameter of the CLI is either a Unicode code point or a keysym. However their respective format is not documented. It turns out that there are multiple issues due to the use of `strtol`: - Code points can be parsed as octal, decimal and hexadecimal while keysyms can only be parsed as hexadecimal. Some programs outputs keysyms in their decimal form (e.g. `wev`) so it is worth to bring symmetry with code points. - Octal format is unusual for both and is triggered by leading zeros, which is unintuitive in this context. - `U+NNNN` format is the standard format for Unicode code points but is not supported. - Plain characters are not supported, e.g.: a, é, ß, Æ, γ, 🦆, etc. Although this is probably the easiest format for most users. Fixed the issues above: - Allow the code point to be passed exactly in the following formats: - Literal character (requires UTF-8 character encoding of the terminal); - Decimal number; - Hexadecimal number: either `0xNNNN` or `U+NNNN`. - Allow the keysym to be passed exactly in the following formats: - Decimal number; - Hexadecimal number: `0xNNNN`; - Name. - Improve both `--help` message and manual. --- changes/tools/+how-to-type-format.feature.md | 11 ++ meson.build | 2 + tools/how-to-type.c | 120 ++++++++++++++++--- tools/xkbcli-how-to-type.1 | 62 +++++++++- 4 files changed, 173 insertions(+), 22 deletions(-) create mode 100644 changes/tools/+how-to-type-format.feature.md diff --git a/changes/tools/+how-to-type-format.feature.md b/changes/tools/+how-to-type-format.feature.md new file mode 100644 index 000000000..a967df8ee --- /dev/null +++ b/changes/tools/+how-to-type-format.feature.md @@ -0,0 +1,11 @@ +`xkbcli how-to-type`: added new input formats and their corresponding documentation. + +*Unicode code points* can be passed in the following formats: +- Literal character (requires UTF-8 character encoding of the terminal); +- Decimal number; +- Hexadecimal number: either `0xNNNN` or `U+NNNN`. + +*Keysyms* can to be passed in the following formats: +- Decimal number; +- Hexadecimal number: `0xNNNN`; +- Name. diff --git a/meson.build b/meson.build index 58ae115e6..6f0d34fe0 100644 --- a/meson.build +++ b/meson.build @@ -518,6 +518,8 @@ if build_tools # Tool: how-to-type executable('xkbcli-how-to-type', 'tools/how-to-type.c', + 'src/utf8-decoding.c', + 'src/utf8-decoding.h', dependencies: tools_dep, install: true, install_dir: dir_libexec) diff --git a/tools/how-to-type.c b/tools/how-to-type.c index 72aea1bee..fcba32bbb 100644 --- a/tools/how-to-type.c +++ b/tools/how-to-type.c @@ -30,16 +30,95 @@ #include #include "xkbcommon/xkbcommon.h" +#include "src/utils.h" #include "src/keysym.h" +#include "src/utf8-decoding.h" #define ARRAY_SIZE(arr) ((sizeof(arr) / sizeof(*(arr)))) +static uint32_t +parse_char_or_codepoint(const char *raw) { + size_t raw_length = strlen_safe(raw); + size_t length = 0; + + if (!raw_length) + return INVALID_UTF8_CODE_POINT; + + /* Try to parse the parameter as a UTF-8 encoded single character */ + uint32_t codepoint = utf8_next_code_point(raw, raw_length, &length); + + /* If parsing failed or did not consume all the string, then try other formats */ + if (codepoint == INVALID_UTF8_CODE_POINT || + length == 0 || length != raw_length) { + fprintf(stderr, "DEBUG: codepoint: %u; leader: %u; length: %zu/%zu\n", + codepoint, utf8_sequence_length(raw), length, raw_length); + char *endp; + long val; + int base = 10; + /* Detect U+NNNN format standard Unicode code point format */ + if (raw_length >= 2 && raw[0] == 'U' && raw[1] == '+') { + base = 16; + raw += 2; + } + /* Use strtol with explicit bases instead of `0` in order to avoid + * unexpected parsing as octal. */ + for (; base <= 16; base += 6) { + errno = 0; + val = strtol(raw, &endp, base); + if (errno != 0 || !isempty(endp) || val < 0 || val > 0x10FFFF) { + val = -1; + } else { + break; + } + } + if (val < 0) { + fprintf(stderr, "ERROR: Failed to convert argument to Unicode code point\n"); + return INVALID_UTF8_CODE_POINT; + } + codepoint = (uint32_t) val; + } + return codepoint; +} + static void usage(const char *argv0, FILE *fp) { - fprintf(fp, "Usage: %s [--keysym] [--rules ] [--model ] " + fprintf(fp, "Usage: %s [--help] [--keysym] [--rules ] [--model ] " "[--layout ] [--variant ] [--options ]" - " \n", argv0); + " \n", argv0); + fprintf(fp, + "\n" + "Prints the key combinations (keycode + modifiers) in the keymap's " + "layouts which would produce the given Unicode codepoint or keysym.\n" + "\n" + " is either:\n" + "- a single character (requires a terminal which uses UTF-8 character " + "encoding);" + "- a Unicode code point, interpreted as hexadecimal if prefixed with" + "`0x` or `U+` else as decimal;\n" + "- a keysym if --keysym is used: either a numeric value (hexadecimal" + "if prefixed with 0x else decimal) or a keysym name.\n" + "\n" + "Options:\n" + " --help\n" + " Print this help and exit\n" + " --keysym\n" + " Treat the argument as a keysym, not a Unicode codepoint\n" + "XKB-specific options:\n" + " --rules \n" + " The XKB ruleset (default: '%s')\n" + " --model \n" + " The XKB model (default: '%s')\n" + " --layout \n" + " The XKB layout (default: '%s')\n" + " --variant \n" + " The XKB layout variant (default: '%s')\n" + " --options \n" + " The XKB options (default: '%s')\n" + "\n", + DEFAULT_XKB_RULES, DEFAULT_XKB_MODEL, DEFAULT_XKB_LAYOUT, + DEFAULT_XKB_VARIANT ? DEFAULT_XKB_VARIANT : "", + DEFAULT_XKB_OPTIONS ? DEFAULT_XKB_OPTIONS : ""); } int @@ -117,41 +196,43 @@ main(int argc, char *argv[]) } } if (argc - optind != 1) { - usage(argv[0], stderr); - exit(EXIT_INVALID_USAGE); + fprintf(stderr, "ERROR: missing positional parameter\n"); + goto parse_error; } if (keysym_mode) { + // Try to parse keysym name or hexadecimal value (0xNNNN) keysym = xkb_keysym_from_name(argv[optind], XKB_KEYSYM_NO_FLAGS); if (keysym == XKB_KEY_NoSymbol) { - fprintf(stderr, "Failed to convert argument to keysym\n"); - goto err; + // Try to parse numeric keysym in base 10, without prefix + val = strtol(argv[optind], &endp, 10); + if (errno != 0 || !isempty(endp) || val <= 0 || val > XKB_KEYSYM_MAX) { + fprintf(stderr, "ERROR: Failed to convert argument to keysym\n"); + goto parse_error; + } + keysym = (uint32_t) val; } } else { - errno = 0; - val = strtol(argv[optind], &endp, 0); - if (errno != 0 || endp == argv[optind] || val < 0 || val > 0x10FFFF) { - usage(argv[0], stderr); - exit(EXIT_INVALID_USAGE); - } - codepoint = (uint32_t) val; + codepoint = parse_char_or_codepoint(argv[optind]); + if (codepoint == INVALID_UTF8_CODE_POINT) + goto parse_error; keysym = xkb_utf32_to_keysym(codepoint); if (keysym == XKB_KEY_NoSymbol) { - fprintf(stderr, "Failed to convert codepoint to keysym\n"); - goto err; + fprintf(stderr, "ERROR: Failed to convert codepoint to keysym\n"); + goto parse_error; } } ret = xkb_keysym_get_name(keysym, name, sizeof(name)); if (ret < 0 || (size_t) ret >= sizeof(name)) { - fprintf(stderr, "Failed to get name of keysym\n"); + fprintf(stderr, "ERROR: Failed to get name of keysym\n"); goto err; } ctx = xkb_context_new(XKB_CONTEXT_NO_FLAGS); if (!ctx) { - fprintf(stderr, "Failed to create XKB context\n"); + fprintf(stderr, "ERROR: Failed to create XKB context\n"); goto err; } @@ -165,7 +246,7 @@ main(int argc, char *argv[]) keymap = xkb_keymap_new_from_names(ctx, &names, XKB_KEYMAP_COMPILE_NO_FLAGS); if (!keymap) { - fprintf(stderr, "Failed to create XKB keymap\n"); + fprintf(stderr, "ERROR: Failed to create XKB keymap\n"); goto err; } @@ -237,4 +318,7 @@ main(int argc, char *argv[]) xkb_keymap_unref(keymap); xkb_context_unref(ctx); return err; +parse_error: + usage(argv[0], stderr); + exit(EXIT_INVALID_USAGE); } diff --git a/tools/xkbcli-how-to-type.1 b/tools/xkbcli-how-to-type.1 index be927d886..8243346bc 100644 --- a/tools/xkbcli-how-to-type.1 +++ b/tools/xkbcli-how-to-type.1 @@ -1,21 +1,75 @@ -.Dd June 4, 2024 +.Dd September 11, 2024 .Dt XKBCLI\-HOW\-TO\-TYPE 1 .Os . .Sh NAME .Nm "xkbcli\-how\-to\-type" -.Nd query how to type a given Unicode codepoint +.Nd query how to type a given Unicode codepoint or keysym . .Sh SYNOPSIS .Nm .Op options -.Ar codepoint/keysym +.Ar character/codepoint/keysym . .Sh DESCRIPTION .Nm prints the key combinations (keycode + modifiers) in the keymap's layouts which -would produce the given Unicode codepoint. +would produce the given Unicode codepoint or keysym. +. +.Pp +.Ar codepoint/keysym +is either: +. +.Bl -bullet -compact +.It +a single character (requires a terminal which uses UTF-8 character encoding); +.It +a Unicode code point, interpreted as hexadecimal if prefixed with +.Li 0x +or +.Li U+ +else as decimal; +. +.It +a keysym if +.Fl \-keysym +is used: either a \fInumeric\fP value (hexadecimal if prefixed with +.Li 0x +else decimal) or a keysym \fIname\fP. +.El +. +.Sh EXAMPLES +.Bl -tag -width Ds +.It Nm Fl \-layout Ar us 97 +.It Nm Fl \-layout Ar us 0x61 +.It Nm Fl \-layout Ar us U+0061 +.It Nm Fl \-layout Ar us a +Print the key combinations that produce the letter "a" +.Po +decimal code point: +.Ar 97 , +hexadecimal code point: +.Ar 61 +.Pc +in the default +.Ar us +layout. +.It Nm Fl \-layout Ar us Fl \-keysym Ar 97 +.It Nm Fl \-layout Ar us Fl \-keysym Ar 0x61 +.It Nm Fl \-layout Ar us Fl \-keysym Ar a +Print the key combinations that produce the keysym "a" +.Po +decimal code: +.Ar 97 , +hexadecimal code: +.Ar 61 +.Pc +in the default +.Ar us +layout. +.Be . +.Sh OPTIONS .Bl -tag -width Ds .It Fl \-keysym Treat the argument as a keysym, not a Unicode codepoint