From d11a76349745576534fda87dd43d77d803a9bd86 Mon Sep 17 00:00:00 2001
From: Rot127 <unisono@quyllur.org>
Date: Wed, 13 Dec 2023 18:02:54 -0500
Subject: [PATCH] Replace OpenBSD regex library with PCRE2.

PCRE2 has way better performance than the OpenBSD
library (something around 20 times faster).

The following flags are enabled for every pattern:

- PCRE2_UTF
- PCRE2_MATCH_INVALID_UTF
- PCRE2_NO_UTF_CHECK

All the others are optional.

Changes made:

- Adds PCRE2 as subproject.
- Changes the API away from POSIX to PCRE2.
- Edits many regex patterns because:
 - ' ' is skipped in patterns, if the EXTENDED flag is set for matching. '\s' must be set now.
 - '.' doesn't match newlines by default.
- Changes the API so matches and their groups are bundled into PVectors.
- Moves the regex component to rz_util.
---
 .gitignore                                 |    1 +
 .reuse/dep5                                |    5 -
 binrz/rz-test/run.c                        |   17 +-
 binrz/rz-test/rz-test.c                    |    5 +-
 librz/asm/arch/hexagon/hexagon_arch.c      |    8 +-
 librz/asm/asm.c                            |   27 +-
 librz/asm/p/asm_bf.c                       |    6 +-
 librz/asm/p/asm_hexagon.c                  |   26 +-
 librz/cons/less.c                          |   23 +-
 librz/cons/pager.c                         |   64 +-
 librz/cons/pager_private.h                 |    9 +-
 librz/core/casm.c                          |    8 +-
 librz/core/cbin.c                          |    2 +-
 librz/core/cmd/cmd_debug.c                 |    3 +-
 librz/core/cmd/cmd_search.c                |    5 +-
 librz/core/core.c                          |   16 +-
 librz/include/meson.build                  |    2 +-
 librz/include/rz_regex.h                   |   77 -
 librz/include/rz_util.h                    |    2 +-
 librz/include/rz_util/rz_regex.h           |   82 +
 librz/magic/file.h                         |    2 +-
 librz/magic/softmagic.c                    |   82 +-
 librz/parse/filter.c                       |   10 +-
 librz/parse/p/parse_arm_pseudo.c           |   55 +-
 librz/parse/p/parse_mips_pseudo.c          |   54 +-
 librz/parse/p/parse_x86_pseudo.c           |   58 +-
 librz/reg/profile.c                        |    6 +-
 librz/search/regexp.c                      |   38 +-
 librz/util/list.c                          |    4 +
 librz/util/meson.build                     |    8 +-
 librz/util/print.c                         |    2 +-
 librz/util/regex.c                         |  411 +++++
 librz/util/regex/COPYRIGHT                 |   54 -
 librz/util/regex/README                    |    5 -
 librz/util/regex/cclass.h                  |   70 -
 librz/util/regex/cname.h                   |  139 --
 librz/util/regex/engine.c                  | 1076 ------------
 librz/util/regex/re_format.7               |  756 ---------
 librz/util/regex/regcomp.c                 | 1786 --------------------
 librz/util/regex/regerror.c                |  132 --
 librz/util/regex/regex.3                   |  667 --------
 librz/util/regex/regex2.h                  |  158 --
 librz/util/regex/regexec.c                 |  174 --
 librz/util/regex/test.c                    |   55 -
 librz/util/regex/utils.h                   |   62 -
 librz/util/str.c                           |   27 +-
 meson.build                                |   24 +
 meson_options.txt                          |    1 +
 subprojects/packagefiles/pcre2/meson.build |   85 +
 subprojects/pcre2.wrap                     |    8 +
 test/db/archos/darwin-arm64/dbg            |  128 +-
 test/db/archos/darwin-x64/dbg              |   84 +-
 test/db/archos/linux-x64/dbg_dmh           |   18 +-
 test/db/archos/linux-x64/dbg_oo            |    5 +-
 test/db/archos/linux-x64/dbg_step          |    4 +-
 test/db/archos/linux-x64/dbg_trace         |    4 +-
 test/db/archos/windows-x64/dbg_dts         |    2 +-
 test/db/cmd/cmd_http_post                  |    2 +-
 test/db/cmd/cmd_pd2                        |    4 +-
 test/db/formats/pdb                        |    2 +-
 test/db/formats/pyc                        |   38 +-
 test/unit/test_regex.c                     |  173 +-
 test/unit/test_str.c                       |    4 +-
 63 files changed, 1196 insertions(+), 5669 deletions(-)
 delete mode 100644 librz/include/rz_regex.h
 create mode 100644 librz/include/rz_util/rz_regex.h
 create mode 100644 librz/util/regex.c
 delete mode 100644 librz/util/regex/COPYRIGHT
 delete mode 100644 librz/util/regex/README
 delete mode 100644 librz/util/regex/cclass.h
 delete mode 100644 librz/util/regex/cname.h
 delete mode 100644 librz/util/regex/engine.c
 delete mode 100644 librz/util/regex/re_format.7
 delete mode 100644 librz/util/regex/regcomp.c
 delete mode 100644 librz/util/regex/regerror.c
 delete mode 100644 librz/util/regex/regex.3
 delete mode 100644 librz/util/regex/regex2.h
 delete mode 100644 librz/util/regex/regexec.c
 delete mode 100644 librz/util/regex/test.c
 delete mode 100644 librz/util/regex/utils.h
 create mode 100644 subprojects/packagefiles/pcre2/meson.build
 create mode 100644 subprojects/pcre2.wrap

diff --git a/.gitignore b/.gitignore
index f499c34b615..6ea09b4dad2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -117,6 +117,7 @@ peda-session-*
 .cache/
 test/.tmp/*
 subprojects/capstone-*/
+subprojects/pcre2/
 subprojects/libzip-*/
 subprojects/lz4-*/
 subprojects/packagecache/
diff --git a/.reuse/dep5 b/.reuse/dep5
index a472729220e..bcf65d0f3d8 100644
--- a/.reuse/dep5
+++ b/.reuse/dep5
@@ -188,11 +188,6 @@ Copyright: 1986-1995 Ian F. Darwin
            1995-present Christos Zoulas and others
 License: BSD-2-Clause
 
-Files: librz/util/regex/*
-Copyright: 1992, 1993, 1994 Henry Spencer
-           1992, 1993, 1994 The Regents of the University of California
-License: BSD-3-Clause
-
 Files: subprojects/rzheap/rz_jemalloc/*
 Copyright: 2002-present Jason Evans <jasone@canonware.com>
            2007-2012 Mozilla Foundation.
diff --git a/binrz/rz-test/run.c b/binrz/rz-test/run.c
index d27cfd26626..a48b10333c8 100644
--- a/binrz/rz-test/run.c
+++ b/binrz/rz-test/run.c
@@ -2,6 +2,8 @@
 // SPDX-License-Identifier: LGPL-3.0-only
 
 #include "rz_test.h"
+#include <rz_util/rz_str.h>
+#include <rz_util/rz_regex.h>
 #include <rz_cons.h>
 
 #if __WINDOWS__
@@ -193,11 +195,16 @@ RZ_API RzSubprocessOutput *rz_test_run_cmd_test(RzTestRunConfig *config, RzCmdTe
 
 RZ_API bool rz_test_cmp_cmd_output(const char *output, const char *expect, const char *regexp) {
 	if (regexp) {
-		RzList *matches = rz_regex_get_match_list(regexp, "e", output);
-		const char *match = rz_list_to_str(matches, '\0');
-		bool equal = (0 == strcmp(expect, match));
-		rz_list_free(matches);
-		RZ_FREE(match);
+		RzStrBuf *match_str = rz_regex_full_match_str(regexp, output, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT, "\n");
+		bool equal = false;
+		ut32 expect_len = strlen(expect);
+		if (expect_len > 0 && expect[expect_len - 1] == '\n') {
+			// Ignore newline
+			equal = (rz_str_cmp(expect, rz_strbuf_get(match_str), expect_len - 1) == 0);
+		} else {
+			equal = RZ_STR_EQ(expect, rz_strbuf_get(match_str));
+		}
+		rz_strbuf_free(match_str);
 		return equal;
 	}
 	return (0 == strcmp(expect, output));
diff --git a/binrz/rz-test/rz-test.c b/binrz/rz-test/rz-test.c
index 88caddbe769..ca3ba88c38e 100644
--- a/binrz/rz-test/rz-test.c
+++ b/binrz/rz-test/rz-test.c
@@ -743,9 +743,8 @@ static void print_diff(const char *actual, const char *expected, const char *reg
 	const char *output = actual;
 
 	if (regexp) {
-		RzList *matches = rz_regex_get_match_list(regexp, "e", actual);
-		output = rz_list_to_str(matches, '\0');
-		rz_list_free(matches);
+		RzStrBuf *match_str = rz_regex_full_match_str(regexp, actual, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT, "\n");
+		output = rz_strbuf_drain(match_str);
 	}
 
 	d = rz_diff_lines_new(expected, output, NULL);
diff --git a/librz/asm/arch/hexagon/hexagon_arch.c b/librz/asm/arch/hexagon/hexagon_arch.c
index ece2396cb06..0d0dbf28c84 100644
--- a/librz/asm/arch/hexagon/hexagon_arch.c
+++ b/librz/asm/arch/hexagon/hexagon_arch.c
@@ -888,13 +888,17 @@ RZ_API void hexagon_reverse_opcode(const RzAsm *rz_asm, HexReversedOpcode *rz_re
 		memcpy(rz_reverse->ana_op, &hic->ana_op, sizeof(RzAnalysisOp));
 		rz_strbuf_set(&rz_reverse->asm_op->buf_asm, hic->text);
 		rz_reverse->asm_op->asm_toks = rz_asm_tokenize_asm_regex(&rz_reverse->asm_op->buf_asm, state->token_patterns);
-		rz_reverse->asm_op->asm_toks->op_type = hic->ana_op.type;
+		if (rz_reverse->asm_op->asm_toks) {
+			rz_reverse->asm_op->asm_toks->op_type = hic->ana_op.type;
+		}
 		break;
 	case HEXAGON_DISAS:
 		memcpy(rz_reverse->asm_op, &hic->asm_op, sizeof(RzAsmOp));
 		rz_strbuf_set(&rz_reverse->asm_op->buf_asm, hic->text);
 		rz_reverse->asm_op->asm_toks = rz_asm_tokenize_asm_regex(&rz_reverse->asm_op->buf_asm, state->token_patterns);
-		rz_reverse->asm_op->asm_toks->op_type = hic->ana_op.type;
+		if (rz_reverse->asm_op->asm_toks) {
+			rz_reverse->asm_op->asm_toks->op_type = hic->ana_op.type;
+		}
 		break;
 	case HEXAGON_ANALYSIS:
 		memcpy(rz_reverse->ana_op, &hic->ana_op, sizeof(RzAnalysisOp));
diff --git a/librz/asm/asm.c b/librz/asm/asm.c
index e7931366f37..956abfc5a96 100644
--- a/librz/asm/asm.c
+++ b/librz/asm/asm.c
@@ -5,7 +5,7 @@
 #include "rz_util/rz_print.h"
 #include <rz_vector.h>
 #include <rz_util/rz_strbuf.h>
-#include <rz_regex.h>
+#include <rz_util/rz_regex.h>
 #include <rz_util/rz_assert.h>
 #include <rz_list.h>
 #include <stdio.h>
@@ -1545,7 +1545,7 @@ RZ_API void rz_asm_compile_token_patterns(RZ_INOUT RzPVector /*<RzAsmTokenPatter
 	rz_pvector_foreach (patterns, it) {
 		RzAsmTokenPattern *pat = *it;
 		if (!pat->regex) {
-			pat->regex = rz_regex_new(pat->pattern, "e");
+			pat->regex = rz_regex_new(pat->pattern, RZ_REGEX_EXTENDED, 0);
 			if (!pat->regex) {
 				RZ_LOG_WARN("Did not compile regex pattern %s.\n", pat->pattern);
 				rz_warn_if_reached();
@@ -1584,32 +1584,31 @@ RZ_API RZ_OWN RzAsmTokenString *rz_asm_tokenize_asm_regex(RZ_BORROW RzStrBuf *as
 			}
 		}
 
-		/// Start pattern search from the beginning
-		size_t asm_str_off = 0;
-
 		// Search for token pattern.
-		RzRegexMatch match[1];
-		while (rz_regex_exec(pattern->regex, asm_str + asm_str_off, 1, match, 0) == 0) {
-			st64 match_start = match[0].rm_so; // Token start
-			st64 match_end = match[0].rm_eo; // Token end
-			st64 len = match_end - match_start; // Length of token
-			st64 tok_offset = asm_str_off + match_start; // Token offset in str
+		RzPVector *match_sets = rz_regex_match_all(pattern->regex, asm_str, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
+		void **grouped_match;
+		rz_pvector_foreach (match_sets, grouped_match) {
+			if (rz_pvector_empty(*grouped_match)) {
+				continue;
+			}
+			RzRegexMatch *match = rz_pvector_at(*grouped_match, 0);
+			st64 match_start = match->start; // Token start
+			st64 len = match->len; // Length of token
+			st64 tok_offset = match_start; // Token offset in str
 			if (overlaps_with_token(toks->tokens, tok_offset, tok_offset + len - 1)) {
 				// If this is true a token with higher priority was matched before.
-				asm_str_off = tok_offset + len;
 				continue;
 			}
 
 			// New token found, add it.
 			if (!is_num(asm_str + tok_offset)) {
 				add_token(toks, tok_offset, len, pattern->type, 0);
-				asm_str_off = tok_offset + len;
 				continue;
 			}
 			ut64 number = strtoull(asm_str + tok_offset, NULL, 0);
 			add_token(toks, tok_offset, len, pattern->type, number);
-			asm_str_off = tok_offset + len;
 		}
+		rz_pvector_free(match_sets);
 	}
 
 	rz_vector_sort(toks->tokens, (RzVectorComparator)cmp_tokens, false, NULL);
diff --git a/librz/asm/p/asm_bf.c b/librz/asm/p/asm_bf.c
index 3e271e73004..7a784a185e7 100644
--- a/librz/asm/p/asm_bf.c
+++ b/librz/asm/p/asm_bf.c
@@ -30,14 +30,14 @@ static RZ_OWN RzPVector /*<RzAsmTokenPattern *>*/ *get_token_patterns(RzAsm *a)
 	pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_REGISTER;
 	pat->pattern = strdup(
-		"(ptr)");
+		"ptr");
 	rz_pvector_push(pvec, pat);
 
 	// reference pattern
 	pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_OPERATOR;
 	pat->pattern = strdup(
-		"(\\[)|(\\])" // Matches a single bracket
+		"\\[|\\]" // Matches a single bracket
 	);
 	rz_pvector_push(pvec, pat);
 
@@ -45,7 +45,7 @@ static RZ_OWN RzPVector /*<RzAsmTokenPattern *>*/ *get_token_patterns(RzAsm *a)
 	pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_SEPARATOR;
 	pat->pattern = strdup(
-		"([[:blank:]]+)");
+		"\\s+");
 	rz_pvector_push(pvec, pat);
 
 	return pvec;
diff --git a/librz/asm/p/asm_hexagon.c b/librz/asm/p/asm_hexagon.c
index e0506c4117f..e8db9add945 100644
--- a/librz/asm/p/asm_hexagon.c
+++ b/librz/asm/p/asm_hexagon.c
@@ -30,15 +30,15 @@ static RZ_OWN RzPVector /*<RzAsmTokenPattern *>*/ *get_token_patterns(HexState *
 	RzAsmTokenPattern *pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_META;
 	pat->pattern = strdup(
-		"(^[\\[\\?\\/\\|\\\\\\{])|(┌)|(│)|(└)|" // Packet prefix
-		"((∎)|[<\\}])([ :])(endloop[01]{1,2})" // Endloop markers
+		"^[\\[\\?\\/\\|\\\\\\{┌│└]|" // Packet prefix
+		"(∎|[<\\}])[\\s:]endloop[01]{1,2}" // Endloop markers
 	);
 	rz_pvector_push(pvec, pat);
 
 	pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_META;
 	pat->pattern = strdup(
-		"(#{1,2})|(\\}$)|" // Immediate prefix, Closing packet bracket
+		"\\#{1,2}|\\}$|" // Immediate prefix, Closing packet bracket
 		"\\.new|:n?t|:raw|<err>" // .new and jump hints
 	);
 	rz_pvector_push(pvec, pat);
@@ -46,7 +46,7 @@ static RZ_OWN RzPVector /*<RzAsmTokenPattern *>*/ *get_token_patterns(HexState *
 	pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_REGISTER;
 	pat->pattern = strdup(
-		"([CNPRMQVO][[:digit:]]{1,2}(:[[:digit:]]{1,2})?(in)?)" // Registers and double registers
+		"[CNPRMQVO]\\d{1,2}(:\\d{1,2})?(in)?" // Registers and double registers
 	);
 	rz_pvector_push(pvec, pat);
 
@@ -60,51 +60,51 @@ static RZ_OWN RzPVector /*<RzAsmTokenPattern *>*/ *get_token_patterns(HexState *
 	pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_NUMBER;
 	pat->pattern = strdup(
-		"(0x[[:digit:]abcdef]+)" // Hexadecimal numbers
+		"0x(\\d|[abcdef])+" // Hexadecimal numbers
 	);
 	rz_pvector_push(pvec, pat);
 
 	pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_MNEMONIC;
 	pat->pattern = strdup(
-		"([[:alpha:]]+[[:digit:]]+[[:alpha:]]*)" // Mnemonics with a decimal number in the name.
+		"[a-zA-Z]+\\d+[a-zA-Z]*" // Mnemonics with a decimal number in the name.
 	);
 	rz_pvector_push(pvec, pat);
 
 	pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_NUMBER;
 	pat->pattern = strdup(
-		"([[:digit:]]+)" // Decimal numbers
+		"\\d+" // Decimal numbers
 	);
 	rz_pvector_push(pvec, pat);
 
 	pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_SEPARATOR;
 	pat->pattern = strdup(
-		"([[:blank:]]+)|" // Spaces and tabs
-		"([,;\\.\\(\\)\\{\\}:])" // Brackets and others
+		"\\s+|" // Spaces and tabs
+		"[,;\\.\\(\\)\\{\\}:]" // Brackets and others
 	);
 	rz_pvector_push(pvec, pat);
 
 	pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_OPERATOR;
 	pat->pattern = strdup(
-		"(\\+)|(=)|(!)|(-)" // +,-,=,],[, ! (not the packet prefix)
+		"[\\+=!-]" // +,-,=,],[, ! (not the packet prefix)
 	);
 	rz_pvector_push(pvec, pat);
 
 	pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_OPERATOR;
 	pat->pattern = strdup(
-		"(\\])|(\\[|<{1,2}|>{1,2})" // +,-,=,],[, ! (not the packet prefix)
+		"\\]|\\[|<{1,2}|>{1,2}" // +,-,=,],[, ! (not the packet prefix)
 	);
 	rz_pvector_push(pvec, pat);
 
 	pat = RZ_NEW0(RzAsmTokenPattern);
 	pat->type = RZ_ASM_TOKEN_MNEMONIC;
 	pat->pattern = strdup(
-		"([[:alnum:]]+)|" // Alphanumeric mnemonics
-		"([[:alnum:]]+_[[:alnum:]]+)" // Menmonics with "_" e.g dealloc_return
+		"\\w+_\\w+|" // Menmonics with "_" e.g dealloc_return
+		"\\w+" // Alphanumeric mnemonics
 	);
 	rz_pvector_push(pvec, pat);
 
diff --git a/librz/cons/less.c b/librz/cons/less.c
index 1127d8def5f..de89fbd5912 100644
--- a/librz/cons/less.c
+++ b/librz/cons/less.c
@@ -3,9 +3,10 @@
 // SPDX-License-Identifier: LGPL-3.0-only
 
 #include <rz_cons.h>
-#include <rz_regex.h>
+#include <rz_util/rz_regex.h>
 #include <rz_util.h>
 #include "pager_private.h"
+#include "rz_vector.h"
 
 #define I(x) rz_cons_singleton()->x
 
@@ -31,7 +32,7 @@ RZ_API int rz_cons_less_str(const char *str, const char *exitkeys) {
 	RzRegex *rx = NULL;
 	int w, h, ch, to, ui = 1, from = 0, i;
 	const char *sreg;
-	RzList **mla;
+	RzPVector **mla;
 
 	// rcons kills str after flushing the buffer, so we must keep a copy
 	char *ostr = strdup(str);
@@ -47,7 +48,7 @@ RZ_API int rz_cons_less_str(const char *str, const char *exitkeys) {
 	if (lines_count < 1) {
 		mla = NULL;
 	} else {
-		mla = calloc(lines_count, sizeof(RzList *));
+		mla = calloc(lines_count, sizeof(RzPVector *));
 		if (!mla) {
 			free(p);
 			free(ostr);
@@ -55,9 +56,6 @@ RZ_API int rz_cons_less_str(const char *str, const char *exitkeys) {
 			return 0;
 		}
 	}
-	for (i = 0; i < lines_count; i++) {
-		mla[i] = rz_list_new();
-	}
 	rz_cons_set_raw(true);
 	rz_cons_show_cursor(false);
 	rz_cons_reset();
@@ -75,7 +73,7 @@ RZ_API int rz_cons_less_str(const char *str, const char *exitkeys) {
 		ch = rz_cons_readchar();
 		if (exitkeys && strchr(exitkeys, ch)) {
 			for (i = 0; i < lines_count; i++) {
-				rz_list_free(mla[i]);
+				rz_pvector_free(mla[i]);
 			}
 			free(p);
 			free(mla);
@@ -129,7 +127,7 @@ RZ_API int rz_cons_less_str(const char *str, const char *exitkeys) {
 				if (rx) {
 					rz_regex_free(rx);
 				}
-				rx = rz_regex_new(sreg, "");
+				rx = rz_regex_new(sreg, RZ_REGEX_EXTENDED | RZ_REGEX_MULTILINE, 0);
 			} else { /* we got an empty string */
 				from = pager_next_match(from, mla, lines_count);
 				break;
@@ -138,9 +136,12 @@ RZ_API int rz_cons_less_str(const char *str, const char *exitkeys) {
 				break;
 			}
 			/* find all occurrences */
-			if (pager_all_matches(p, rx, mla, lines, lines_count)) {
-				from = pager_next_match(from, mla, lines_count);
+			RzPVector *matches = rz_regex_match_all_not_grouped(rx, str, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
+			if (rz_pvector_empty(matches)) {
+				rz_pvector_free(matches);
+				break;
 			}
+			from = pager_next_match(from, mla, lines_count);
 			break;
 		case 'n': /* next match */
 			/* search already performed */
@@ -157,7 +158,7 @@ RZ_API int rz_cons_less_str(const char *str, const char *exitkeys) {
 		}
 	}
 	for (i = 0; i < lines_count; i++) {
-		rz_list_free(mla[i]);
+		rz_pvector_free(mla[i]);
 	}
 	free(mla);
 	rz_regex_free(rx);
diff --git a/librz/cons/pager.c b/librz/cons/pager.c
index 545cc98ea30..c6c09c2384e 100644
--- a/librz/cons/pager.c
+++ b/librz/cons/pager.c
@@ -1,16 +1,15 @@
 // SPDX-FileCopyrightText: 2019 pancake <pancake@nopcode.org>
 // SPDX-License-Identifier: LGPL-3.0-only
 
-#include <rz_regex.h>
+#include <rz_util/rz_regex.h>
 #include <rz_util.h>
 #include <rz_cons.h>
 #include "pager_private.h"
+#include "rz_vector.h"
 
-RZ_IPI void pager_color_line(const char *line, RzStrpool *p, RzList /*<RzRegexMatch *>*/ *ml) {
+RZ_IPI void pager_color_line(const char *line, RzStrpool *p, RzPVector /*<RzRegexMatch *>*/ *ml) {
 	int m_len, offset = 0;
 	char *m_addr;
-	RzListIter *it;
-	RzRegexMatch *m;
 	char *inv[2] = {
 		RZ_CONS_INVERT(true, true),
 		RZ_CONS_INVERT(false, true)
@@ -20,15 +19,17 @@ RZ_IPI void pager_color_line(const char *line, RzStrpool *p, RzList /*<RzRegexMa
 		strlen(inv[1])
 	};
 	rz_strpool_empty(p);
-	rz_list_foreach (ml, it, m) {
+	void **it;
+	rz_pvector_foreach (ml, it) {
+		RzRegexMatch *m = *it;
 		/* highlight a match */
-		rz_strpool_memcat(p, line + offset, m->rm_so - offset);
+		rz_strpool_memcat(p, line + offset, m->start - offset);
 		rz_strpool_memcat(p, inv[0], linv[0]);
-		m_len = m->rm_eo - m->rm_so;
+		m_len = m->len;
 		if (m_len < 0) {
 			m_len = 0;
 		}
-		m_addr = rz_str_ndup(line + m->rm_so, m_len);
+		m_addr = rz_str_ndup(line + m->start, m_len);
 		if (m_addr) {
 			/* in case there's a CSI in the middle of this match*/
 			m_len = rz_str_ansi_filter(m_addr, NULL, NULL, m_len);
@@ -37,7 +38,7 @@ RZ_IPI void pager_color_line(const char *line, RzStrpool *p, RzList /*<RzRegexMa
 			}
 			rz_strpool_memcat(p, m_addr, m_len);
 			rz_strpool_memcat(p, inv[1], linv[1]);
-			offset = m->rm_eo;
+			offset = m->start + m->len;
 			free(m_addr);
 		}
 	}
@@ -45,7 +46,7 @@ RZ_IPI void pager_color_line(const char *line, RzStrpool *p, RzList /*<RzRegexMa
 	rz_strpool_append(p, line + offset);
 }
 
-RZ_IPI void pager_printpage(const char *line, int *index, RzList /*<RzRegexMatch *>*/ **mla, int from, int to, int w) {
+RZ_IPI void pager_printpage(const char *line, int *index, RzPVector /*<RzRegexMatch *>*/ **mla, int from, int to, int w) {
 	int i;
 
 	rz_cons_clear00();
@@ -71,68 +72,33 @@ RZ_IPI void pager_printpage(const char *line, int *index, RzList /*<RzRegexMatch
 	rz_cons_flush();
 }
 
-RZ_IPI int pager_next_match(int from, RzList /*<RzRegexMatch *>*/ **mla, int lcount) {
+RZ_IPI int pager_next_match(int from, RzPVector /*<RzRegexMatch *>*/ **mla, int lcount) {
 	int l;
 	if (from > lcount - 2) {
 		return from;
 	}
 	for (l = from + 1; l < lcount; l++) {
 		/* if there's at least one match on the line */
-		if (rz_list_first(mla[l])) {
+		if (!rz_pvector_empty(mla[l])) {
 			return l;
 		}
 	}
 	return from;
 }
 
-RZ_IPI int pager_prev_match(int from, RzList /*<RzRegexMatch *>*/ **mla) {
+RZ_IPI int pager_prev_match(int from, RzPVector /*<RzRegexMatch *>*/ **mla) {
 	int l;
 	if (from < 1) {
 		return from;
 	}
 	for (l = from - 1; l > 0; l--) {
-		if (rz_list_first(mla[l])) {
+		if (!rz_pvector_empty(mla[l])) {
 			return l;
 		}
 	}
 	return from;
 }
 
-RZ_IPI bool pager_all_matches(const char *s, RzRegex *rx, RzList /*<RzRegexMatch *>*/ **mla, int *lines, int lcount) {
-	bool res = false;
-	RzRegexMatch m = { 0 };
-	int l, slen;
-	for (l = 0; l < lcount; l++) {
-		m.rm_so = 0;
-		const char *loff = s + lines[l]; /* current line offset */
-		char *clean = strdup(loff);
-		if (!clean) {
-			return false;
-		}
-		int *cpos = NULL;
-		int ncpos = rz_str_ansi_filter(clean, NULL, &cpos, -1);
-		m.rm_eo = slen = strlen(clean);
-		rz_list_purge(mla[l]);
-		while (!rz_regex_exec(rx, clean, 1, &m, RZ_REGEX_STARTEND)) {
-			if (!cpos || m.rm_so >= ncpos) {
-				break;
-			}
-			RzRegexMatch *ms = RZ_NEW0(RzRegexMatch);
-			if (ms && cpos) {
-				ms->rm_so = cpos[m.rm_so];
-				ms->rm_eo = cpos[m.rm_eo];
-				rz_list_append(mla[l], ms);
-			}
-			m.rm_so = m.rm_eo;
-			m.rm_eo = slen;
-			res = true;
-		}
-		free(cpos);
-		free(clean);
-	}
-	return res;
-}
-
 RZ_IPI int *pager_splitlines(char *s, int *lines_count) {
 	int lines_size = 128;
 	int *lines = NULL;
diff --git a/librz/cons/pager_private.h b/librz/cons/pager_private.h
index 5c2c8b79e36..749735eda1d 100644
--- a/librz/cons/pager_private.h
+++ b/librz/cons/pager_private.h
@@ -4,11 +4,10 @@
 #ifndef PAGER_PRIVATE_H
 #define PAGER_PRIVATE_H
 
-RZ_IPI void pager_color_line(const char *line, RzStrpool *p, RzList /*<RzRegexMatch *>*/ *ml);
-RZ_IPI void pager_printpage(const char *line, int *index, RzList /*<RzRegexMatch *>*/ **mla, int from, int to, int w);
-RZ_IPI int pager_next_match(int from, RzList /*<RzRegexMatch *>*/ **mla, int lcount);
-RZ_IPI int pager_prev_match(int from, RzList /*<RzRegexMatch *>*/ **mla);
-RZ_IPI bool pager_all_matches(const char *s, RzRegex *rx, RzList /*<RzRegexMatch *>*/ **mla, int *lines, int lcount);
+RZ_IPI void pager_color_line(const char *line, RzStrpool *p, RzPVector /*<RzRegexMatch *>*/ *ml);
+RZ_IPI void pager_printpage(const char *line, int *index, RzPVector /*<RzRegexMatch *>*/ **mla, int from, int to, int w);
+RZ_IPI int pager_next_match(int from, RzPVector /*<RzRegexMatch *>*/ **mla, int lcount);
+RZ_IPI int pager_prev_match(int from, RzPVector /*<RzRegexMatch *>*/ **mla);
 RZ_IPI int *pager_splitlines(char *s, int *lines_count);
 
 #endif
diff --git a/librz/core/casm.c b/librz/core/casm.c
index a3d3a62335c..405c001f5ae 100644
--- a/librz/core/casm.c
+++ b/librz/core/casm.c
@@ -2,6 +2,8 @@
 // SPDX-FileCopyrightText: 2009-2019 pancake <pancake@nopcode.org>
 // SPDX-License-Identifier: LGPL-3.0-only
 
+#include <rz_util/rz_regex.h>
+#include <rz_vector.h>
 #include <rz_types.h>
 #include <rz_core.h>
 #include <rz_asm.h>
@@ -342,9 +344,11 @@ RZ_API RzList /*<RzCoreAsmHit *>*/ *rz_core_asm_strsearch(RzCore *core, const ch
 				} else if (!regexp) {
 					matches = strstr(opst, tokens[matchcount]) != NULL;
 				} else {
-					rx = rz_regex_new(tokens[matchcount], "es");
-					matches = rz_regex_exec(rx, opst, 0, 0, 0) == 0;
+					rx = rz_regex_new(tokens[matchcount], RZ_REGEX_EXTENDED, 0);
+					RzPVector *tmp_m = rz_regex_match_first(rx, opst, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
+					matches = (!rz_pvector_empty(tmp_m) && tmp_m != NULL) ? 1 : 0;
 					rz_regex_free(rx);
+					rz_pvector_free(tmp_m);
 				}
 			}
 			if (align && align > 1) {
diff --git a/librz/core/cbin.c b/librz/core/cbin.c
index 39cbc63f39d..87c5314c934 100644
--- a/librz/core/cbin.c
+++ b/librz/core/cbin.c
@@ -3524,7 +3524,7 @@ static void classdump_objc(RzBinClass *c) {
 	RzBinClassField *f;
 	RzBinSymbol *sym;
 	rz_list_foreach (c->fields, iter2, f) {
-		if (f->name && rz_regex_match("ivar", "e", f->name)) {
+		if (f->name && strstr("ivar", f->name)) {
 			rz_cons_printf("  %s %s\n", f->type, f->name);
 		}
 	}
diff --git a/librz/core/cmd/cmd_debug.c b/librz/core/cmd/cmd_debug.c
index 8fc8c29a840..ba970c79f64 100644
--- a/librz/core/cmd/cmd_debug.c
+++ b/librz/core/cmd/cmd_debug.c
@@ -1,6 +1,7 @@
 // SPDX-FileCopyrightText: 2009-2020 pancake <pancake@nopcode.org>
 // SPDX-License-Identifier: LGPL-3.0-only
 
+#include <rz_util/rz_regex.h>
 #include <rz_core.h>
 #include <rz_debug.h>
 #include <sdb.h>
@@ -315,7 +316,7 @@ static bool step_until_inst(RzCore *core, const char *instr, bool regex) {
 		if (ret > 0) {
 			const char *buf_asm = rz_asm_op_get_asm(&asmop);
 			if (regex) {
-				if (rz_regex_match(instr, "e", buf_asm)) {
+				if (rz_regex_contains(instr, buf_asm, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT)) {
 					RZ_LOG_ERROR("core: esil: stop.\n");
 					break;
 				}
diff --git a/librz/core/cmd/cmd_search.c b/librz/core/cmd/cmd_search.c
index 37488355ff5..47cb9dc2e47 100644
--- a/librz/core/cmd/cmd_search.c
+++ b/librz/core/cmd/cmd_search.c
@@ -6,6 +6,7 @@
 #include <rz_core.h>
 #include <rz_io.h>
 #include <rz_list.h>
+#include <rz_util/rz_regex.h>
 #include <rz_types_base.h>
 #include "../core_private.h"
 
@@ -1080,8 +1081,8 @@ static RzList /*<RzCoreAsmHit *>*/ *construct_rop_gadget(RzCore *core, ut64 addr
 		idx += opsz;
 		addr += opsz;
 		if (rx) {
-			grep_find = !rz_regex_match(rx, "e", opst);
-			search_hit = (end && grep && (grep_find < 1));
+			grep_find = rz_regex_contains(rx, opst, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT);
+			search_hit = (end && grep && grep_find);
 		} else {
 			search_hit = (end && grep && strstr(opst, grep_str));
 		}
diff --git a/librz/core/core.c b/librz/core/core.c
index 9bca0ef3d2a..b88a881b59d 100644
--- a/librz/core/core.c
+++ b/librz/core/core.c
@@ -1,6 +1,8 @@
 // SPDX-FileCopyrightText: 2009-2020 pancake <pancake@nopcode.org>
 // SPDX-License-Identifier: LGPL-3.0-only
 
+#include <rz_util/rz_regex.h>
+#include <rz_vector.h>
 #include <rz_core.h>
 #include <rz_socket.h>
 #include <rz_cmp.h>
@@ -1377,17 +1379,17 @@ static void autocomplete_theme(RzCore *core, RzLineCompletion *completion, const
 
 static bool find_e_opts(RzCore *core, RzLineCompletion *completion, RzLineBuffer *buf) {
 	const char *pattern = "e (.*)=";
-	RzRegex *rx = rz_regex_new(pattern, "e");
-	const size_t nmatch = 2;
-	RzRegexMatch pmatch[2] = { 0 };
+	RzRegex *rx = rz_regex_new(pattern, RZ_REGEX_EXTENDED, 0);
 	bool ret = false;
 
-	if (rz_regex_exec(rx, buf->data, nmatch, pmatch, 1)) {
+	RzPVector *matches = rz_regex_match_all_not_grouped(rx, buf->data, buf->length, 0, RZ_REGEX_DEFAULT);
+	if (!matches || rz_pvector_empty(matches) || rz_pvector_len(matches) < 2) {
 		goto out;
 	}
 	int i;
 	char *str = NULL, *sp;
-	for (i = pmatch[1].rm_so; i < pmatch[1].rm_eo; i++) {
+	RzRegexMatch *m1 = rz_pvector_at(matches, 1);
+	for (i = m1->start; i < m1->start + m1->len; i++) {
 		str = rz_str_appendch(str, buf->data[i]);
 	}
 	if (!str) {
@@ -1403,7 +1405,8 @@ static bool find_e_opts(RzCore *core, RzLineCompletion *completion, RzLineBuffer
 		*sp = ' ';
 	}
 	if (!node) {
-		return false;
+		ret = false;
+		goto out;
 	}
 	RzListIter *iter;
 	char *option;
@@ -1420,6 +1423,7 @@ static bool find_e_opts(RzCore *core, RzLineCompletion *completion, RzLineBuffer
 
 out:
 	rz_regex_free(rx);
+	rz_pvector_free(matches);
 	return ret;
 }
 
diff --git a/librz/include/meson.build b/librz/include/meson.build
index e3be2161099..5d18f64b548 100644
--- a/librz/include/meson.build
+++ b/librz/include/meson.build
@@ -38,7 +38,6 @@ include_files = [
   'rz_platform.h',
   'rz_project.h',
   'rz_reg.h',
-  'rz_regex.h',
   'rz_search.h',
   'rz_sign.h',
   'rz_skiplist.h',
@@ -96,6 +95,7 @@ rz_util_files = [
   'rz_util/rz_punycode.h',
   'rz_util/rz_range.h',
   'rz_util/rz_rbtree.h',
+  'rz_util/rz_regex.h',
   'rz_util/rz_serialize.h',
   'rz_util/rz_signal.h',
   'rz_util/rz_spaces.h',
diff --git a/librz/include/rz_regex.h b/librz/include/rz_regex.h
deleted file mode 100644
index 0b56cec8f2a..00000000000
--- a/librz/include/rz_regex.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#ifndef RZ_REGEX_H
-#define RZ_REGEX_H
-
-#include <rz_types.h>
-#include <rz_list.h>
-#include <sys/types.h>
-
-typedef struct rz_regex_t {
-	int re_magic;
-	size_t re_nsub; /* number of parenthesized subexpressions */
-	const char *re_endp; /* end pointer for RZ_REGEX_PEND */
-	struct re_guts *re_g; /* none of your business :-) */
-	int re_flags;
-} RzRegex;
-
-typedef struct rz_regmatch_t {
-	st64 rm_so; /* start of match */
-	st64 rm_eo; /* end of match */
-} RzRegexMatch;
-
-/* regcomp() flags */
-#define RZ_REGEX_BASIC    0000
-#define RZ_REGEX_EXTENDED 0001
-#define RZ_REGEX_ICASE    0002
-#define RZ_REGEX_NOSUB    0004
-#define RZ_REGEX_NEWLINE  0010
-#define RZ_REGEX_NOSPEC   0020
-#define RZ_REGEX_PEND     0040
-#define RZ_REGEX_DUMP     0200
-
-/* regerror() flags */
-#define RZ_REGEX_ENOSYS   (-1) /* Reserved */
-#define RZ_REGEX_NOMATCH  1
-#define RZ_REGEX_BADPAT   2
-#define RZ_REGEX_ECOLLATE 3
-#define RZ_REGEX_ECTYPE   4
-#define RZ_REGEX_EESCAPE  5
-#define RZ_REGEX_ESUBREG  6
-#define RZ_REGEX_EBRACK   7
-#define RZ_REGEX_EPAREN   8
-#define RZ_REGEX_EBRACE   9
-#define RZ_REGEX_BADBR    10
-#define RZ_REGEX_ERANGE   11
-#define RZ_REGEX_ESPACE   12
-#define RZ_REGEX_BADRPT   13
-#define RZ_REGEX_EMPTY    14
-#define RZ_REGEX_ASSERT   15
-#define RZ_REGEX_INVARG   16
-#define RZ_REGEX_ILLSEQ   17
-#define RZ_REGEX_ATOI     255 /* convert name to number (!) */
-#define RZ_REGEX_ITOA     0400 /* convert number to name (!) */
-
-/* regexec() flags */
-#define RZ_REGEX_NOTBOL   00001
-#define RZ_REGEX_NOTEOL   00002
-#define RZ_REGEX_STARTEND 00004
-#define RZ_REGEX_TRACE    00400 /* tracing of execution */
-#define RZ_REGEX_LARGE    01000 /* force large representation */
-#define RZ_REGEX_BACKR    02000 /* force use of backref code */
-
-RZ_API RzRegex *rz_regex_new(const char *pattern, const char *cflags);
-RZ_API int rz_regex_match(const char *pattern, const char *flags, const char *text);
-RZ_API char *rz_regex_match_extract(RZ_NONNULL const char *str, RZ_NONNULL RzRegexMatch *match);
-RZ_API RzList /*<char *>*/ *rz_regex_get_match_list(const char *pattern, const char *flags, const char *text);
-RZ_API int rz_regex_flags(const char *flags);
-RZ_API int rz_regex_comp(RzRegex *, const char *, int);
-RZ_API size_t rz_regex_error(int, const RzRegex *, char *, size_t);
-/*
- * gcc under c99 mode won't compile "[]" by itself.  As a workaround,
- * a dummy argument name is added.
- */
-RZ_API bool rz_regex_check(const RzRegex *rr, const char *str);
-RZ_API int rz_regex_exec(const RzRegex *preg, const char *string, size_t nmatch, RzRegexMatch __pmatch[], int eflags);
-RZ_API void rz_regex_free(RzRegex *);
-RZ_API void rz_regex_fini(RzRegex *);
-
-#endif /* !_REGEX_H_ */
diff --git a/librz/include/rz_util.h b/librz/include/rz_util.h
index efb802be707..c6ba827508e 100644
--- a/librz/include/rz_util.h
+++ b/librz/include/rz_util.h
@@ -6,7 +6,6 @@
 
 #include <rz_types.h>
 #include <rz_diff.h>
-#include <rz_regex.h>
 #include <rz_getopt.h>
 #include <rz_list.h> // rizin linked list
 #include <rz_skiplist.h> // skiplist
@@ -50,6 +49,7 @@
 #include "rz_util/rz_panels.h"
 #include "rz_util/rz_punycode.h"
 #include "rz_util/rz_range.h"
+#include "rz_util/rz_regex.h"
 #include "rz_util/rz_signal.h"
 #include "rz_util/rz_spaces.h"
 #include "rz_util/rz_stack.h"
diff --git a/librz/include/rz_util/rz_regex.h b/librz/include/rz_util/rz_regex.h
new file mode 100644
index 00000000000..8aeae28a9c1
--- /dev/null
+++ b/librz/include/rz_util/rz_regex.h
@@ -0,0 +1,82 @@
+// SPDX-FileCopyrightText: 2023 Rot127 <unisono@quyllur.org>
+// SPDX-License-Identifier: LGPL-3.0-only
+
+#ifndef RZ_REGEX_H
+#define RZ_REGEX_H
+
+#include <rz_util/rz_strbuf.h>
+#include <rz_vector.h>
+#include <rz_types.h>
+#include <rz_list.h>
+#include <sys/types.h>
+
+#define RZ_REGEX_SIZE size_t
+
+// Some basic PCRE2 macros. There are way more defined
+// and should be added here if needed.
+#define RZ_REGEX_ERROR_NOMATCH (-1) /* PCRE2_ERROR_NOMATCH */
+#define RZ_REGEX_ERROR_PARTIAL (-2) /* PCRE2_ERROR_PARTIAL */
+
+#define RZ_REGEX_DEFAULT       0
+#define RZ_REGEX_CASELESS      0x00000008u /* PCRE2_CASELESS */
+#define RZ_REGEX_EXTENDED      0x00000080u /* PCRE2_EXTENDED */
+#define RZ_REGEX_EXTENDED_MORE 0x01000000u /* PCRE2_EXTENDED_MORE */
+#define RZ_REGEX_MULTILINE     0x00000400u /* PCRE2_MULTILINE */
+
+#define RZ_REGEX_JIT_PARTIAL_SOFT 0x00000002u /* PCRE2_JIT_PARTIAL_SOFT */
+#define RZ_REGEX_JIT_PARTIAL_HARD 0x00000004u /* PCRE2_JIT_PARTIAL_HARD */
+
+#define RZ_REGEX_PARTIAL_SOFT 0x00000010u /* PCRE2_PARTIAL_SOFT */
+#define RZ_REGEX_PARTIAL_HARD 0x00000020u /* PCRE2_PARTIAL_HARD */
+
+#define RZ_REGEX_UNSET           (~(RZ_REGEX_SIZE)0) /* PCRE2_UNSET */
+#define RZ_REGEX_ZERO_TERMINATED (~(RZ_REGEX_SIZE)0) /* PCRE2_ZERO_TERMINATED */
+
+typedef int RzRegexStatus; ///< An status number returned by the regex API.
+typedef size_t RzRegexSize; ///< Size of a text or regex. This is the size measured in code width. For UTF-8: bytes.
+typedef ut32 RzRegexFlags; ///< Regex flag bits.
+typedef uint8_t *RzRegexPattern; ///< A regex pattern string.
+typedef void RzRegex; ///< A regex expression.
+
+typedef struct {
+	RzRegexSize group_idx; ///< Index of the group. Used to determine name if any was given.
+	RzRegexSize start; ///< Start offset into the text where the match starts.
+	RzRegexSize len; ///< Length of match in bytes.
+} RzRegexMatch;
+
+typedef void RzRegexMatchData; ///< PCRE2 internal match data type
+
+RZ_API RZ_OWN RzRegex *rz_regex_new(RZ_NONNULL const char *pattern, RzRegexFlags cflags, RzRegexFlags jflags);
+RZ_API void rz_regex_free(RZ_OWN RzRegex *regex);
+RZ_API void rz_regex_error_msg(RzRegexStatus errcode, RZ_OUT char *errbuf, RzRegexSize errbuf_size);
+RZ_API const ut8 *rz_regex_get_match_name(RZ_NONNULL const RzRegex *regex, ut32 name_idx);
+RZ_API RzRegexStatus rz_regex_match(RZ_NONNULL const RzRegex *regex, RZ_NONNULL const char *text,
+	RzRegexSize text_size,
+	RzRegexSize text_offset,
+	RzRegexFlags mflags);
+RZ_API RZ_OWN RzPVector /*<RzRegexMatch *>*/ *rz_regex_match_all_not_grouped(
+	RZ_NONNULL const RzRegex *regex,
+	RZ_NONNULL const char *text,
+	RzRegexSize text_size,
+	RzRegexSize text_offset,
+	RzRegexFlags mflags);
+RZ_API RZ_OWN RzPVector /*<RzRegexMatch *>*/ *rz_regex_match_first(
+	RZ_NONNULL const RzRegex *regex,
+	RZ_NONNULL const char *text,
+	RzRegexSize text_size,
+	RzRegexSize text_offset,
+	RzRegexFlags mflags);
+RZ_API RZ_OWN RzPVector /*<RzVector<RzRegexMatch *> *>*/ *rz_regex_match_all(
+	RZ_NONNULL const RzRegex *regex,
+	RZ_NONNULL const char *text,
+	RzRegexSize text_size,
+	RzRegexSize text_offset,
+	RzRegexFlags mflags);
+RZ_API bool rz_regex_contains(RZ_NONNULL const char *pattern, RZ_NONNULL const char *text,
+	RzRegexSize text_size,
+	RzRegexFlags cflags, RzRegexFlags mflags);
+RZ_API RZ_OWN RzStrBuf *rz_regex_full_match_str(RZ_NONNULL const char *pattern, RZ_NONNULL const char *text,
+	RzRegexSize text_size,
+	RzRegexFlags cflags, RzRegexFlags mflags, RZ_NONNULL const char *separator);
+
+#endif /* RZ_REGEX_H */
diff --git a/librz/magic/file.h b/librz/magic/file.h
index 92d3a0e7f97..92b6f499249 100644
--- a/librz/magic/file.h
+++ b/librz/magic/file.h
@@ -41,7 +41,7 @@
 #include <errno.h>
 #include <fcntl.h> /* For open and flags */
 #include <inttypes.h> // TODO: use utX
-#include <rz_regex.h>
+#include <rz_util/rz_regex.h>
 #include <sys/types.h>
 /* Do this here and now, because struct stat gets re-defined on solaris */
 #include <sys/stat.h>
diff --git a/librz/magic/softmagic.c b/librz/magic/softmagic.c
index fa5fd89c0cd..829428a1c99 100644
--- a/librz/magic/softmagic.c
+++ b/librz/magic/softmagic.c
@@ -34,7 +34,7 @@
 #if !USE_LIB_MAGIC
 
 #include "file.h"
-#include "rz_regex.h"
+#include <rz_util/rz_regex.h>
 #include <string.h>
 #include <ctype.h>
 #include <stdlib.h>
@@ -274,24 +274,17 @@ static int match(RzMagic *ms, struct rz_magic *magic, ut32 nmagic, const ut8 *s,
 }
 
 static int check_fmt(RzMagic *ms, struct rz_magic *m) {
-	RzRegex rx;
-	int rc;
-
 	if (!strchr(RZ_MAGIC_DESC, '%')) {
 		return 0;
 	}
 
-	rc = rz_regex_comp(&rx, "%[-0-9\\.]*s", RZ_REGEX_EXTENDED | RZ_REGEX_NOSUB);
-	if (rc) {
-		char errmsg[512];
-		rz_regex_error(rc, &rx, errmsg, sizeof(errmsg) - 1);
-		file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
+	RzRegex *re = rz_regex_new("%[-0-9\\.]*s", RZ_REGEX_EXTENDED, 0);
+	if (!re) {
 		return -1;
-	} else {
-		rc = rz_regex_exec(&rx, RZ_MAGIC_DESC, 0, 0, 0);
-		rz_regex_fini(&rx);
-		return !rc;
 	}
+	RzRegexStatus rc = rz_regex_match(re, RZ_MAGIC_DESC, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
+	rz_regex_free(re);
+	return rc > 0 ? 1 : 0;
 }
 
 char *strdupn(const char *str, size_t n) {
@@ -1412,59 +1405,32 @@ static int magiccheck(RzMagic *ms, struct rz_magic *m) {
 		break;
 	}
 	case FILE_REGEX: {
-		int rc;
-		RzRegex rx;
-		char errmsg[512];
-
 		if (!ms->search.s) {
 			return 0;
 		}
 
 		l = 0;
-		rc = rz_regex_comp(&rx, m->value.s,
-			RZ_REGEX_EXTENDED | RZ_REGEX_NEWLINE |
-				((m->str_flags & STRING_IGNORE_CASE) ? RZ_REGEX_ICASE : 0));
-		if (rc) {
-			(void)rz_regex_error(rc, &rx, errmsg, sizeof(errmsg) - 1);
-			file_magerror(ms, "regex error %d, (%s)",
-				rc, errmsg);
-			v = (ut64)-1;
-		} else {
-			RzRegexMatch pmatch[1];
-#ifndef RZ_REGEX_STARTEND
-#define RZ_REGEX_STARTEND 0
-			size_t l = ms->search.s_len - 1;
-			char c = ms->search.s[l];
-			((char *)(intptr_t)ms->search.s)[l] = '\0';
-#else
-			pmatch[0].rm_so = 0;
-			pmatch[0].rm_eo = ms->search.s_len;
-#endif
-			rc = rz_regex_exec(&rx, (const char *)ms->search.s, 1, pmatch, RZ_REGEX_STARTEND);
-#if RZ_REGEX_STARTEND == 0
-			((char *)(intptr_t)ms->search.s)[l] = c;
-#endif
-			switch (rc) {
-			case 0:
-				ms->search.s += (int)pmatch[0].rm_so;
-				ms->search.offset += (size_t)pmatch[0].rm_so;
-				ms->search.rm_len = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);
-				v = 0;
-				break;
-			case RZ_REGEX_NOMATCH:
-				v = 1;
-				break;
-			default:
-				(void)rz_regex_error(rc, &rx, errmsg, sizeof(errmsg) - 1);
-				file_magerror(ms, "regexec error %d, (%s)", rc, errmsg);
-				v = UT64_MAX;
-				break;
-			}
-			rz_regex_fini(&rx);
+		RzRegex *rx = rz_regex_new(m->value.s,
+			RZ_REGEX_EXTENDED |
+				((m->str_flags & STRING_IGNORE_CASE) ? RZ_REGEX_CASELESS : 0),
+			0);
+		if (!rx) {
+			return -1;
 		}
-		if (v == (ut64)-1) {
+		RzPVector *matches = rz_regex_match_first(rx, (const char *)ms->search.s, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
+		rz_regex_free(rx);
+		if (!matches) {
 			return -1;
+		} else if (rz_pvector_len(matches) == 0) {
+			v = 1;
+			break;
 		}
+		RzRegexMatch *m = rz_pvector_head(matches);
+		ms->search.s += (int)m->start;
+		ms->search.offset += (size_t)m->start;
+		ms->search.rm_len = (size_t)m->len;
+		rz_pvector_free(matches);
+		v = 0;
 		break;
 	}
 	default:
diff --git a/librz/parse/filter.c b/librz/parse/filter.c
index fc774860e6a..040fdd57e88 100644
--- a/librz/parse/filter.c
+++ b/librz/parse/filter.c
@@ -4,7 +4,7 @@
 // SPDX-License-Identifier: LGPL-3.0-only
 
 #include "rz_util/rz_str.h"
-#include <rz_regex.h>
+#include <rz_util/rz_regex.h>
 #include <stdio.h>
 
 #include <rz_types.h>
@@ -179,7 +179,13 @@ static bool is_lea(const char *asm_str) {
 	if (!colored) {
 		return strlen(asm_str) > 4 && rz_str_startswith_icase(asm_str, "lea") && asm_str[3] == ' ';
 	}
-	return rz_regex_match("(^\x1b\\[[[:digit:]]{1,3}mlea\x1b\\[0m.+)", "ei", asm_str) != RZ_REGEX_NOMATCH;
+	RzRegex *re = rz_regex_new("(^\x1b\\[\\d{1,3}mlea\x1b\\[0m.+)", RZ_REGEX_EXTENDED | RZ_REGEX_CASELESS, 0);
+	if (!re) {
+		return false;
+	}
+	bool res = rz_regex_match(re, asm_str, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT) != RZ_REGEX_ERROR_NOMATCH;
+	rz_regex_free(re);
+	return res;
 }
 
 static bool filter(RzParse *p, ut64 addr, RzFlag *f, RzAnalysisHint *hint, char *data, char *str, int len, bool big_endian) {
diff --git a/librz/parse/p/parse_arm_pseudo.c b/librz/parse/p/parse_arm_pseudo.c
index 30ca4b2b8b0..9c54178cc75 100644
--- a/librz/parse/p/parse_arm_pseudo.c
+++ b/librz/parse/p/parse_arm_pseudo.c
@@ -10,6 +10,8 @@
 #include <rz_flag.h>
 #include <rz_analysis.h>
 #include <rz_parse.h>
+#include <rz_util/rz_regex.h>
+#include <rz_vector.h>
 
 #include "parse_common.c"
 
@@ -266,44 +268,34 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
 		return tstr;
 	}
 
-	RzRegex var_re;
-	if (rz_regex_comp(&var_re, re_str, RZ_REGEX_EXTENDED | RZ_REGEX_ICASE) != 0) {
-		rz_regex_fini(&var_re);
+	RzRegex *var_re = rz_regex_new(re_str, RZ_REGEX_EXTENDED | RZ_REGEX_CASELESS, 0);
+	if (!var_re) {
 		return tstr;
 	}
-	RzRegexMatch match[4] = { 0 };
-	if (rz_regex_exec(&var_re, tstr, RZ_ARRAY_SIZE(match), match, 0) != 0) {
-		rz_regex_fini(&var_re);
+	RzPVector *matches = rz_regex_match_first(var_re, tstr, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
+	if (!matches || rz_pvector_empty(matches)) {
+		rz_regex_free(var_re);
+		rz_pvector_free(matches);
 		return tstr;
 	}
-	for (size_t i = 0; i < RZ_ARRAY_SIZE(match); i++) {
-		char *s = rz_regex_match_extract(tstr, &match[i]);
-		free(s);
-	}
-	rz_regex_fini(&var_re);
+	rz_regex_free(var_re);
 
-	rz_return_val_if_fail(match[1].rm_so >= 0, tstr);
-	char *reg_str = rz_regex_match_extract(tstr, &match[1]);
+	RzRegexMatch *match = rz_pvector_at(matches, 1);
+	char *reg_str = rz_str_ndup(tstr + match->start, match->len);
 	if (!reg_str) {
+		rz_pvector_free(matches);
 		return tstr;
 	}
-	if (!rz_str_casecmp(reg_str, "x29")) {
-		free(reg_str);
-		reg_str = strdup("fp");
-	}
 
-	rz_return_val_if_fail(match[group_idx_addend].rm_so >= 0, tstr);
-	char *addend_str = rz_regex_match_extract(tstr, &match[group_idx_addend]);
-	if (!addend_str) {
-		free(reg_str);
-		return tstr;
-	}
+	rz_return_val_if_fail(rz_pvector_len(matches) >= group_idx_addend, tstr);
+	match = rz_pvector_at(matches, group_idx_addend);
+	const char *addend_str = tstr + match->start;
 	st64 reg_addend = strtoll(addend_str, NULL, 0);
-	free(addend_str);
 
 	if (group_idx_sign >= 0) {
-		rz_return_val_if_fail(match[group_idx_sign].rm_so >= 0, tstr);
-		char sign = tstr[match[group_idx_sign].rm_so];
+		rz_return_val_if_fail(rz_pvector_len(matches) >= group_idx_sign, tstr);
+		match = rz_pvector_at(matches, group_idx_sign);
+		char sign = tstr[match->start];
 		if (sign == '-') {
 			reg_addend = -reg_addend;
 		}
@@ -312,16 +304,18 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
 	char *varstr = p->var_expr_for_reg_access(f, addr, reg_str, reg_addend);
 	if (!varstr) {
 		free(reg_str);
+		rz_pvector_free(matches);
 		return tstr;
 	}
 
 	// replace!
-	size_t tail_len = strlen(tstr) - match[0].rm_eo;
+	RzRegexMatch *match_full = rz_pvector_at(matches, 0);
+	size_t tail_len = strlen(tstr) - (match_full->start + match_full->len);
 	RzStrBuf sb;
 	rz_strbuf_init(&sb);
 	// reserve with a bit of padding for brackets, reg, whitespace, ...
-	rz_strbuf_reserve(&sb, match[0].rm_so + strlen(varstr) + tail_len + 32);
-	rz_strbuf_append_n(&sb, tstr, match[0].rm_so);
+	rz_strbuf_reserve(&sb, match_full->start + strlen(varstr) + tail_len + 32);
+	rz_strbuf_append_n(&sb, tstr, match_full->start);
 	if (brackets) {
 		rz_strbuf_append(&sb, "[");
 	}
@@ -332,10 +326,11 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
 	if (brackets) {
 		rz_strbuf_append(&sb, "]");
 	}
-	rz_strbuf_append_n(&sb, tstr + match[0].rm_eo, tail_len);
+	rz_strbuf_append_n(&sb, tstr + match_full->start + match_full->len, tail_len);
 	free(reg_str);
 	free(varstr);
 	free(tstr);
+	rz_pvector_free(matches);
 	return rz_strbuf_drain_nofree(&sb);
 }
 
diff --git a/librz/parse/p/parse_mips_pseudo.c b/librz/parse/p/parse_mips_pseudo.c
index 5e188de7bc9..633bd0e16bd 100644
--- a/librz/parse/p/parse_mips_pseudo.c
+++ b/librz/parse/p/parse_mips_pseudo.c
@@ -13,6 +13,7 @@
 #include <rz_parse.h>
 
 #include "parse_common.c"
+#include <rz_util/rz_regex.h>
 
 static RzList /*<char *>*/ *mips_tokenize(const char *assembly, size_t length);
 
@@ -155,7 +156,7 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
 	if (!p->pseudo) {
 		// match e.g. -0x18(fp)
 		// capturing "-0x18", "0x", "fp"
-		re_str = "(-?(0x)?[0-9a-f]+)\\(([a-z][0-9a-z]))";
+		re_str = "(-?(0x)?[0-9a-f]+)\\(([a-z][0-9a-z])\\)";
 		group_idx_reg = 3;
 		group_idx_sign = -1;
 		group_idx_addend = 1;
@@ -168,39 +169,36 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
 		group_idx_addend = 3;
 	}
 
-	RzRegex var_re;
-	if (rz_regex_comp(&var_re, re_str, RZ_REGEX_EXTENDED | RZ_REGEX_ICASE) != 0) {
-		rz_regex_fini(&var_re);
+	RzRegex *var_re = rz_regex_new(re_str, RZ_REGEX_EXTENDED | RZ_REGEX_CASELESS, 0);
+	if (!var_re) {
 		return tstr;
 	}
-	RzRegexMatch match[4];
-	if (rz_regex_exec(&var_re, tstr, RZ_ARRAY_SIZE(match), match, 0) != 0) {
-		rz_regex_fini(&var_re);
+	RzPVector *matches = rz_regex_match_first(var_re, tstr, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
+	if (!matches || rz_pvector_empty(matches)) {
+		rz_regex_free(var_re);
+		rz_pvector_free(matches);
 		return tstr;
 	}
-	for (size_t i = 0; i < RZ_ARRAY_SIZE(match); i++) {
-		char *s = rz_regex_match_extract(tstr, &match[i]);
-		free(s);
-	}
-	rz_regex_fini(&var_re);
+	rz_regex_free(var_re);
 
-	rz_return_val_if_fail(match[group_idx_reg].rm_so >= 0, tstr);
-	char *reg_str = rz_regex_match_extract(tstr, &match[group_idx_reg]);
+	rz_return_val_if_fail(rz_pvector_len(matches) > group_idx_reg, tstr);
+	RzRegexMatch *match = rz_pvector_at(matches, group_idx_reg);
+	char *reg_str = rz_str_ndup(tstr + match->start, match->len);
 	if (!reg_str) {
+		rz_pvector_free(matches);
 		return tstr;
 	}
 
-	char *addend_str = rz_regex_match_extract(tstr, &match[group_idx_addend]);
-	if (!addend_str) {
-		free(reg_str);
-		return tstr;
-	}
+	rz_return_val_if_fail(rz_pvector_len(matches) >= group_idx_addend, tstr);
+	match = rz_pvector_at(matches, group_idx_addend);
+	const char *addend_str = tstr + match->start;
 	st64 reg_addend = strtoll(addend_str, NULL, 0);
-	free(addend_str);
 
 	if (group_idx_sign >= 0) {
-		rz_return_val_if_fail(match[group_idx_sign].rm_so >= 0, tstr);
-		if (tstr[match[group_idx_sign].rm_so] == '-') {
+		rz_return_val_if_fail(rz_pvector_len(matches) >= group_idx_sign, tstr);
+		match = rz_pvector_at(matches, group_idx_sign);
+		char sign = tstr[match->start];
+		if (sign == '-') {
 			reg_addend = -reg_addend;
 		}
 	}
@@ -208,15 +206,18 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
 	char *varstr = p->var_expr_for_reg_access(f, addr, reg_str, reg_addend);
 	if (!varstr) {
 		free(reg_str);
+		rz_pvector_free(matches);
 		return tstr;
 	}
 
 	// information gathered, now perform the replacement in the string
-	size_t tail_len = strlen(tstr) - match[0].rm_eo;
+	RzRegexMatch *match_full = rz_pvector_at(matches, 0);
+	size_t tail_len = strlen(tstr) - (match_full->start + match_full->len);
 	RzStrBuf sb;
 	rz_strbuf_init(&sb);
-	rz_strbuf_reserve(&sb, match[0].rm_so + strlen(varstr) + tail_len + 32);
-	rz_strbuf_append_n(&sb, tstr, match[0].rm_so);
+	// reserve with a bit of padding for brackets, reg, whitespace, ...
+	rz_strbuf_reserve(&sb, match_full->start + strlen(varstr) + tail_len + 32);
+	rz_strbuf_append_n(&sb, tstr, match_full->start);
 	if (p->localvar_only) {
 		if (p->pseudo) {
 			rz_strbuf_append(&sb, varstr);
@@ -230,10 +231,11 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
 			rz_strbuf_appendf(&sb, "%s(%s)", varstr, reg_str);
 		}
 	}
-	rz_strbuf_append_n(&sb, tstr + match[0].rm_eo, tail_len);
+	rz_strbuf_append_n(&sb, tstr + match_full->start + match_full->len, tail_len);
 	free(reg_str);
 	free(varstr);
 	free(tstr);
+	rz_pvector_free(matches);
 	return rz_strbuf_drain_nofree(&sb);
 }
 
diff --git a/librz/parse/p/parse_x86_pseudo.c b/librz/parse/p/parse_x86_pseudo.c
index 2ed40721e2f..e4d316931bf 100644
--- a/librz/parse/p/parse_x86_pseudo.c
+++ b/librz/parse/p/parse_x86_pseudo.c
@@ -291,7 +291,7 @@ static bool parse(RzParse *p, const char *data, RzStrBuf *sb) {
 	return true;
 }
 
-static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFunction *f, char *tstr, bool att) {
+static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFunction *f, RZ_OWN char *tstr, bool att) {
 	const ut64 addr = op->addr;
 
 	if (!p->var_expr_for_reg_access || !f) {
@@ -305,7 +305,7 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
 	if (att) {
 		// match e.g. -0x18(%rbp)
 		// capturing "-0x18", "0x", "rbp"
-		re_str = "(-?(0x)?[0-9a-f]+)\\(%([re][0-9a-z][0-9a-z]))";
+		re_str = "(-?(0x)?[0-9a-f]+)\\(%([re][0-9a-z][0-9a-z])\\)";
 		group_idx_reg = 3;
 		group_idx_sign = -1;
 		group_idx_addend = 1;
@@ -318,46 +318,42 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
 		group_idx_addend = 3;
 	}
 
-	RzRegex var_re;
-	if (rz_regex_comp(&var_re, re_str, RZ_REGEX_EXTENDED | RZ_REGEX_ICASE) != 0) {
-		rz_regex_fini(&var_re);
+	RzRegex *var_re = rz_regex_new(re_str, RZ_REGEX_EXTENDED | RZ_REGEX_CASELESS, 0);
+	if (!var_re) {
 		return tstr;
 	}
-	RzRegexMatch match[4];
-	if (rz_regex_exec(&var_re, tstr, RZ_ARRAY_SIZE(match), match, 0) != 0) {
-		rz_regex_fini(&var_re);
+	RzPVector *matches = rz_regex_match_first(var_re, tstr, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
+	if (!matches || rz_pvector_empty(matches)) {
+		rz_regex_free(var_re);
+		rz_pvector_free(matches);
 		return tstr;
 	}
-	for (size_t i = 0; i < RZ_ARRAY_SIZE(match); i++) {
-		char *s = rz_regex_match_extract(tstr, &match[i]);
-		free(s);
-	}
-	rz_regex_fini(&var_re);
+	rz_regex_free(var_re);
 
-	rz_return_val_if_fail(match[group_idx_reg].rm_so >= 0, tstr);
-	char *reg_str = rz_regex_match_extract(tstr, &match[group_idx_reg]);
+	rz_return_val_if_fail(rz_pvector_len(matches) > group_idx_reg, tstr);
+	RzRegexMatch *match = rz_pvector_at(matches, group_idx_reg);
+	char *reg_str = rz_str_ndup(tstr + match->start, match->len);
 	if (!reg_str) {
+		rz_pvector_free(matches);
 		return tstr;
 	}
 
-	char *addend_str = rz_regex_match_extract(tstr, &match[group_idx_addend]);
-	if (!addend_str) {
-		free(reg_str);
-		return tstr;
-	}
+	rz_return_val_if_fail(rz_pvector_len(matches) >= group_idx_addend, tstr);
+	match = rz_pvector_at(matches, group_idx_addend);
+	const char *addend_str = tstr + match->start;
 	int base = 0;
-	size_t addend_len = strlen(addend_str);
+	size_t addend_len = match->len;
 	if (addend_len && (addend_str[addend_len - 1] == 'h' || addend_str[addend_len - 1] == 'H')) {
 		// MASM syntax prints hex numbers like `1234h`
-		addend_str[addend_len - 1] = '\0';
 		base = 16;
 	}
 	st64 reg_addend = strtoll(addend_str, NULL, base);
-	free(addend_str);
 
 	if (group_idx_sign >= 0) {
-		rz_return_val_if_fail(match[group_idx_sign].rm_so >= 0, tstr);
-		if (tstr[match[group_idx_sign].rm_so] == '-') {
+		rz_return_val_if_fail(rz_pvector_len(matches) >= group_idx_sign, tstr);
+		match = rz_pvector_at(matches, group_idx_sign);
+		char sign = tstr[match->start];
+		if (sign == '-') {
 			reg_addend = -reg_addend;
 		}
 	}
@@ -365,15 +361,18 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
 	char *varstr = p->var_expr_for_reg_access(f, addr, reg_str, reg_addend);
 	if (!varstr) {
 		free(reg_str);
+		rz_pvector_free(matches);
 		return tstr;
 	}
 
 	// replace!
-	size_t tail_len = strlen(tstr) - match[0].rm_eo;
+	RzRegexMatch *match_full = rz_pvector_at(matches, 0);
+	size_t tail_len = strlen(tstr) - (match_full->start + match_full->len);
 	RzStrBuf sb;
 	rz_strbuf_init(&sb);
-	rz_strbuf_reserve(&sb, match[0].rm_so + strlen(varstr) + tail_len + 32);
-	rz_strbuf_append_n(&sb, tstr, match[0].rm_so);
+	// reserve with a bit of padding for brackets, reg, whitespace, ...
+	rz_strbuf_reserve(&sb, match_full->start + strlen(varstr) + tail_len + 32);
+	rz_strbuf_append_n(&sb, tstr, match_full->start);
 	if (!p->localvar_only && !att) {
 		rz_strbuf_appendf(&sb, "%s %c ", reg_str, reg_addend < 0 ? '-' : '+');
 	}
@@ -381,10 +380,11 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
 	if (!p->localvar_only && att) {
 		rz_strbuf_appendf(&sb, "(%%%s)", reg_str);
 	}
-	rz_strbuf_append_n(&sb, tstr + match[0].rm_eo, tail_len);
+	rz_strbuf_append_n(&sb, tstr + match_full->start + match_full->len, tail_len);
 	free(reg_str);
 	free(varstr);
 	free(tstr);
+	rz_pvector_free(matches);
 	return rz_strbuf_drain_nofree(&sb);
 }
 
diff --git a/librz/reg/profile.c b/librz/reg/profile.c
index 284d4452679..820b2835cbe 100644
--- a/librz/reg/profile.c
+++ b/librz/reg/profile.c
@@ -288,7 +288,7 @@ static bool parse_reg_profile_str(RZ_OUT RzList /*<RzRegProfileAlias *>*/ *alias
 			continue;
 		}
 		if (rz_str_strchr(line, "#")) {
-			RzList *line_and_cmt = rz_str_split_duplist_n_regex(line, "#", 0, true);
+			RzList *line_and_cmt = rz_str_split_duplist_n_regex(line, "\\#", 0, true);
 			char *raw_comment = strdup(rz_list_get_top(line_and_cmt));
 			if (!raw_comment) {
 				RZ_LOG_WARN("Comment could not be split from register definition. Line: \"%s\"\n", line);
@@ -299,11 +299,11 @@ static bool parse_reg_profile_str(RZ_OUT RzList /*<RzRegProfileAlias *>*/ *alias
 				RZ_LOG_WARN("Could not prepend # to comment. Line: \"%s\".\n", line);
 				continue;
 			}
-			toks = rz_str_split_duplist_n_regex(rz_list_get_bottom(line_and_cmt), "[[:blank:]]+", 0, true);
+			toks = rz_str_split_duplist_n_regex(rz_list_get_bottom(line_and_cmt), "\\s+", 0, true);
 			rz_list_append(toks, comment);
 			rz_list_free(line_and_cmt);
 		} else {
-			toks = rz_str_split_duplist_n_regex(line, "[[:blank:]]+", 0, true);
+			toks = rz_str_split_duplist_n_regex(line, "\\s+", 0, true);
 		}
 		ut32 toks_len = rz_list_length(toks);
 		if (rz_list_empty(toks)) {
diff --git a/librz/search/regexp.c b/librz/search/regexp.c
index 3139e314664..d35a5ae3c7b 100644
--- a/librz/search/regexp.c
+++ b/librz/search/regexp.c
@@ -3,48 +3,54 @@
 // SPDX-License-Identifier: LGPL-3.0-only
 
 #include "rz_search.h"
-#include <rz_regex.h>
+#include <rz_vector.h>
+#include <rz_util/rz_regex.h>
 
+/**
+ * \return -1 on failure.
+ */
 RZ_API int rz_search_regexp_update(RzSearch *s, ut64 from, const ut8 *buf, int len) {
 	RzSearchKeyword *kw;
 	RzListIter *iter;
-	RzRegexMatch match;
-	RzRegex compiled = { 0 };
+	RzPVector *matches = NULL;
+	RzRegex *compiled = NULL;
 	const int old_nhits = s->nhits;
 	int ret = 0;
 
 	rz_list_foreach (s->kws, iter, kw) {
-		int reflags = RZ_REGEX_EXTENDED;
+		int cflags = RZ_REGEX_EXTENDED;
 
 		if (kw->icase) {
-			reflags |= RZ_REGEX_ICASE;
+			cflags |= RZ_REGEX_CASELESS;
 		}
 
-		if (rz_regex_comp(&compiled, (char *)kw->bin_keyword, reflags)) {
+		compiled = rz_regex_new((char *)kw->bin_keyword, cflags, 0);
+		if (!compiled) {
 			eprintf("Cannot compile '%s' regexp\n", kw->bin_keyword);
 			return -1;
 		}
 
-		match.rm_so = 0;
-		match.rm_eo = len;
-
-		while (!rz_regex_exec(&compiled, (char *)buf, 1, &match, RZ_REGEX_STARTEND)) {
-			int t = rz_search_hit_new(s, kw, from + match.rm_so);
-			if (!t) {
+		matches = rz_regex_match_all_not_grouped(compiled, (char *)buf, len, from, RZ_REGEX_DEFAULT);
+		void **it;
+		rz_pvector_foreach (matches, it) {
+			RzRegexMatch *m = *it;
+			int t = rz_search_hit_new(s, kw, m->start);
+			if (t == 0) {
 				ret = -1;
+				rz_pvector_free(matches);
 				goto beach;
 			}
+			// Max hits reached
 			if (t > 1) {
+				rz_pvector_free(matches);
 				goto beach;
 			}
-			/* Setup the boundaries for RZ_REGEX_STARTEND */
-			match.rm_so = match.rm_eo;
-			match.rm_eo = len;
 		}
+		rz_pvector_free(matches);
 	}
 
 beach:
-	rz_regex_fini(&compiled);
+	rz_regex_free(compiled);
 	if (!ret) {
 		ret = s->nhits - old_nhits;
 	}
diff --git a/librz/util/list.c b/librz/util/list.c
index de4aa2ddc05..405a3575614 100644
--- a/librz/util/list.c
+++ b/librz/util/list.c
@@ -827,6 +827,10 @@ RZ_API RZ_OWN RzList *rz_list_uniq(RZ_NONNULL const RzList *list, RZ_NONNULL RzL
 /**
  * \brief Casts a RzList containg strings into a concatenated string
  *
+ * \param list The list of strings to concatenate.
+ * \param ch char to separate the match strings.
+ *
+ * \return The concatenated string.
  **/
 RZ_API RZ_OWN char *rz_list_to_str(RZ_NONNULL RzList *list, char ch) {
 	RzListIter *iter;
diff --git a/librz/util/meson.build b/librz/util/meson.build
index 8de72c48c32..eccbfa7f1fa 100644
--- a/librz/util/meson.build
+++ b/librz/util/meson.build
@@ -43,9 +43,7 @@ rz_util_common_sources = [
   'punycode.c',
   'range.c',
   'rbtree.c',
-  'regex/regcomp.c',
-  'regex/regerror.c',
-  'regex/regexec.c',
+  'regex.c',
   'serialize_spaces.c',
   'signal.c',
   'skiplist.c',
@@ -91,7 +89,7 @@ rz_util_common_sources = [
 ]
 rz_util_sources = rz_util_common_sources
 
-rz_util_deps = [ldl, lrt, mth, th, utl] + platform_deps
+rz_util_deps = [ldl, lrt, mth, th, utl, pcre2_dep] + platform_deps
 if zlib_dep.found()
   rz_util_deps += [zlib_dep]
 endif
@@ -149,7 +147,7 @@ if meson.is_cross_build()
       cc_native.find_library('psapi'),
     ]
   endif
-  rz_util_native_deps = [ldl_native, lrt_native, mth_native, th_native, utl_native] + platform_native_deps
+  rz_util_native_deps = [ldl_native, lrt_native, mth_native, th_native, utl_native, pcre2_dep] + platform_native_deps
   if execinfo_native.found()
     rz_util_native_deps += [execinfo_native]
   endif
diff --git a/librz/util/print.c b/librz/util/print.c
index fe3441ce732..69d426f69d4 100644
--- a/librz/util/print.c
+++ b/librz/util/print.c
@@ -4,7 +4,7 @@
 #include <ctype.h>
 #include <rz_util/rz_str.h>
 #include <rz_list.h>
-#include <rz_regex.h>
+#include <rz_util/rz_regex.h>
 #include <rz_types.h>
 #include <rz_util/rz_assert.h>
 #include <rz_util/rz_log.h>
diff --git a/librz/util/regex.c b/librz/util/regex.c
new file mode 100644
index 00000000000..bf85fded2e0
--- /dev/null
+++ b/librz/util/regex.c
@@ -0,0 +1,411 @@
+// SPDX-FileCopyrightText: 2023 Rot127 <unisono@quyllur.org>
+// SPDX-License-Identifier: LGPL-3.0-only
+
+#define PCRE2_STATIC
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+
+#include <rz_util/rz_strbuf.h>
+#include <rz_vector.h>
+#include <rz_util/rz_regex.h>
+#include <rz_types.h>
+#include <rz_util/rz_assert.h>
+#include <rz_util.h>
+
+typedef pcre2_general_context RzRegexGeneralContext; ///< General context.
+typedef pcre2_compile_context RzRegexCompContext; ///< The context for compiling.
+typedef pcre2_match_context RzRegexMatchContext; ///< The context for matching.
+
+typedef struct {
+	RzRegexGeneralContext *general;
+	RzRegexCompContext *compile;
+	RzRegexMatchContext *match;
+} RzRegexContexts;
+
+static void print_pcre2_err(RZ_NULLABLE const char *pattern, RzRegexStatus err_num, size_t err_off) {
+	PCRE2_UCHAR buffer[256];
+	pcre2_get_error_message(err_num, buffer, sizeof(buffer));
+	RZ_LOG_ERROR("Regex compilation for '%s' failed at %" PFMTSZu ": %s\n", pattern ? pattern : "(null)", err_off,
+		buffer);
+}
+
+/**
+ * \brief Compile a regex pattern to a RzRegex and return it.
+ * In case of an error, an error message is printed and NULL is returned.
+ *
+ * \param pattern The regex pattern string.
+ * \param cflags The compilation flags or zero for default.
+ * \param jflags The compilation flags for the JIT compiler.
+ * You can pass RZ_REGEX_JIT_PARTIAL_SOFT or RZ_REGEX_JIT_PARTIAL_HARD if you
+ * intend to use the pattern for partial matching. Otherwise set it to 0.
+ *
+ * \return The compiled regex or NULL in case of failure.
+ */
+RZ_API RZ_OWN RzRegex *rz_regex_new(RZ_NONNULL const char *pattern, RzRegexFlags cflags, RzRegexFlags jflags) {
+	rz_return_val_if_fail(pattern, NULL);
+
+	RzRegexStatus err_num;
+	RzRegexSize err_off;
+	ut32 supported = 0;
+	pcre2_config(PCRE2_CONFIG_UNICODE, &supported);
+	if (supported != 1) {
+		RZ_LOG_ERROR("Unicode not supported by PCRE2 library.");
+		return NULL;
+	}
+	char *fixed_pat = NULL;
+	const char *pat = NULL;
+	if ((cflags & RZ_REGEX_EXTENDED) || (cflags & RZ_REGEX_EXTENDED_MORE)) {
+		if (!strchr(pattern, ' ')) {
+			pat = pattern;
+		} else {
+			// In PCRE2 with the extended flag set, ascii space characters ' ' are skipped.
+			// We need to replace them with \s unfortunately to keep our API stable.
+			fixed_pat = rz_str_replace(strdup(pattern), " ", "\\s", 1);
+			pat = fixed_pat;
+		}
+	} else {
+		pat = pattern;
+	}
+
+	RzRegex *regex = pcre2_compile(
+		(PCRE2_SPTR)pat,
+		PCRE2_ZERO_TERMINATED,
+		cflags | PCRE2_UTF | PCRE2_MATCH_INVALID_UTF,
+		&err_num,
+		&err_off,
+		NULL);
+	if (!regex) {
+		print_pcre2_err(pat, err_num, err_off);
+		free(fixed_pat);
+		return NULL;
+	}
+#ifdef SUPPORTS_PCRE2_JIT
+	RzRegexStatus jit_err = pcre2_jit_compile(regex, jflags | PCRE2_JIT_COMPLETE);
+	if (jit_err < 0) {
+		print_pcre2_err(pat, jit_err, 0);
+	}
+#endif
+	free(fixed_pat);
+	return regex;
+}
+
+/**
+ * \brief Frees a given RzRegex.
+ *
+ * \param regex The RzRegex to free.
+ */
+RZ_API void rz_regex_free(RZ_OWN RzRegex *regex) {
+	pcre2_code_free(regex);
+}
+
+static void rz_regex_match_data_free(RZ_OWN RzRegexMatchData *match_data) {
+	pcre2_match_data_free(match_data);
+}
+
+/**
+ * \brief Matches the \p regex in the \p text and returns a status code with the result.
+ *
+ * \param regex The regex pattern to match.
+ * \param text The text to search in.
+ * \param text_size The length of the buffer pointed to by \p text.
+ * Can be set to RZ_REGEX_ZERO_TERMINATED if the buffer is a zero terminated string.
+ * \param text_offset The offset into \p text from where the search starts.
+ * \param mflags Match flags.
+ *
+ * \return A status code which describes the result.
+ */
+RZ_API RzRegexStatus rz_regex_match(RZ_NONNULL const RzRegex *regex, RZ_NONNULL const char *text,
+	RzRegexSize text_size,
+	RzRegexSize text_offset,
+	RzRegexFlags mflags) {
+	rz_return_val_if_fail(regex && text, RZ_REGEX_ERROR_NOMATCH);
+
+	pcre2_match_data *mdata = pcre2_match_data_create_from_pattern(regex, NULL);
+	RzRegexStatus rc = pcre2_match(regex, (PCRE2_SPTR)text, text_size, text_offset, mflags | PCRE2_NO_UTF_CHECK, mdata, NULL);
+	pcre2_match_data_free(mdata);
+	return rc;
+}
+
+/**
+ * \brief Generates the error message to \p errcode.
+ *
+ * \param errcode The error code.
+ * \param errbuf The error message buffer.
+ * \param errbuf_size The error message buffer size in bytes.
+ */
+RZ_API void rz_regex_error_msg(RzRegexStatus errcode, RZ_OUT char *errbuf, RzRegexSize errbuf_size) {
+	pcre2_get_error_message(errcode, (PCRE2_UCHAR *)errbuf, errbuf_size);
+}
+
+/**
+ * \brief Returns the name of a group.
+ *
+ * \param regex The regex expression with named groups.
+ * \param group_idx The index of the group to get the name for.
+ *
+ * \return The name of the group or NULL in case of failure or non is was set.
+ */
+RZ_API const ut8 *rz_regex_get_match_name(RZ_NONNULL const RzRegex *regex, ut32 group_idx) {
+	rz_return_val_if_fail(regex, NULL);
+
+	ut32 namecount;
+	ut32 name_entry_size;
+	PCRE2_SPTR nametable_ptr;
+
+	pcre2_pattern_info(
+		regex,
+		PCRE2_INFO_NAMECOUNT,
+		&namecount);
+
+	pcre2_pattern_info(
+		regex,
+		PCRE2_INFO_NAMETABLE,
+		&nametable_ptr);
+
+	pcre2_pattern_info(
+		regex,
+		PCRE2_INFO_NAMEENTRYSIZE,
+		&name_entry_size);
+
+	for (size_t i = 0; i < namecount; i++) {
+		int n = (nametable_ptr[0] << 8) | nametable_ptr[1];
+		if (n == group_idx) {
+			return nametable_ptr + 2;
+		}
+		nametable_ptr += name_entry_size;
+	}
+	return NULL;
+}
+
+/**
+ * \brief Finds the first match in a text and returns it as a pvector.
+ * First element in the vector is always the whole match, the following possible groups.
+ *
+ * \param regex The regex pattern to match.
+ * \param text The text to search in.
+ * \param text_size The length of the buffer pointed to by \p text.
+ * Can be set to RZ_REGEX_ZERO_TERMINATED if the buffer is a zero terminated string.
+ * \param text_offset The offset into \p text from where the search starts.
+ * \param mflags Match flags.
+ *
+ * \return The matches as pvector. NULL in case of failure. Empty for no matches or regex related errors.
+ */
+RZ_API RZ_OWN RzPVector /*<RzRegexMatch *>*/ *rz_regex_match_first(
+	RZ_NONNULL const RzRegex *regex,
+	RZ_NONNULL const char *text,
+	RzRegexSize text_size,
+	RzRegexSize text_offset,
+	RzRegexFlags mflags) {
+	rz_return_val_if_fail(regex && text, NULL);
+
+	RzPVector *matches = rz_pvector_new(NULL);
+	RzRegexMatchData *mdata = pcre2_match_data_create_from_pattern(regex, NULL);
+	RzRegexStatus rc = pcre2_match(regex, (PCRE2_SPTR)text, text_size, text_offset, mflags | PCRE2_NO_UTF_CHECK, mdata, NULL);
+
+	if (rc == PCRE2_ERROR_NOMATCH) {
+		// Nothing matched return empty vector.
+		goto fini;
+	}
+
+	if (rc < 0) {
+		// Some error happend. Inform the user.
+		PCRE2_UCHAR buffer[256];
+		pcre2_get_error_message(rc, buffer, sizeof(buffer));
+		RZ_LOG_WARN("Regex matching failed: %s\n", buffer);
+		goto fini;
+	}
+
+	// Add groups to vector
+	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(mdata);
+
+	ut32 name_entry_size;
+	PCRE2_SPTR nametable_ptr;
+
+	pcre2_pattern_info(
+		regex,
+		PCRE2_INFO_NAMETABLE,
+		&nametable_ptr);
+
+	pcre2_pattern_info(
+		regex,
+		PCRE2_INFO_NAMEENTRYSIZE,
+		&name_entry_size);
+
+	for (size_t i = 0; i < rc; i++) {
+		if (ovector[2 * i] > ovector[2 * i + 1]) {
+			// This happens for \K lookaround. We fail if used.
+			// See pcre2demo.c for details.
+			RZ_LOG_ERROR("Usage of \\K to set start of the pattern later than the end, is not implemented.\n");
+			goto fini;
+		}
+
+		// Offset and length of match
+		RzRegexMatch *match = RZ_NEW0(RzRegexMatch);
+		match->start = ovector[2 * i];
+		match->len = ovector[2 * i + 1] - match->start;
+		match->group_idx = i;
+		nametable_ptr += name_entry_size;
+		rz_pvector_push(matches, match);
+	}
+
+fini:
+	rz_regex_match_data_free(mdata);
+	return matches;
+}
+
+/**
+ * \brief Finds all matches in a text and returns them as vector.
+ * The result is a flat vector of matches. A single match with multiple
+ * groups is simply appeneded to the resulting vector.
+ *
+ * \param regex The regex pattern to match.
+ * \param text The text to search in.
+ * \param text_size The length of the buffer pointed to by \p text.
+ * Can be set to RZ_REGEX_ZERO_TERMINATED if the buffer is a zero terminated string.
+ * \param text_offset The offset into \p text from where the search starts.
+ * \param mflags Match flags.
+ *
+ * \return A vector of all matches or NULL in case of failure.
+ * Sub-groups of a match are appended after their main match.
+ */
+RZ_API RZ_OWN RzPVector /*<RzRegexMatch *>*/ *rz_regex_match_all_not_grouped(
+	RZ_NONNULL const RzRegex *regex,
+	RZ_NONNULL const char *text,
+	RzRegexSize text_size,
+	RzRegexSize text_offset,
+	RzRegexFlags mflags) {
+	rz_return_val_if_fail(regex && text, NULL);
+
+	RzPVector *all_matches = rz_pvector_new(NULL);
+	RzPVector *matches = rz_regex_match_first(regex, text, text_size, text_offset, mflags);
+	while (matches && rz_pvector_len(matches) > 0) {
+		RzRegexMatch *whole_match = rz_pvector_head(matches);
+		text_offset = whole_match->start + whole_match->len;
+
+		size_t mlen = rz_pvector_len(matches);
+		for (size_t i = 0; i < mlen; ++i) {
+			RzRegexMatch *m = rz_pvector_pop_front(matches);
+			rz_pvector_push(all_matches, m);
+		}
+		rz_pvector_free(matches);
+		// Search again after the whole first match.
+		matches = rz_regex_match_first(regex, text, text_size, text_offset, mflags);
+	}
+
+	// Free last vector without matches.
+	rz_pvector_free(matches);
+	return all_matches;
+}
+
+/**
+ * \brief Finds all matches in a text and returns them as vector of vector matches.
+ *
+ * \param pattern The regex pattern to match.
+ * \param text The text to search in.
+ * \param text_size The length of the buffer pointed to by \p text.
+ * Can be set to RZ_REGEX_ZERO_TERMINATED if the buffer is a zero terminated string.
+ * \param text_offset The offset into \p text from where the search starts.
+ * \param mflags Match flags.
+ *
+ * \return PVector of every match in the given string or NULL in case of failure.
+ * One match with all its groups is again assembled in a pvector.
+ */
+RZ_API RZ_OWN RzPVector /*<RzVector<RzRegexMatch *> *>*/ *rz_regex_match_all(
+	RZ_NONNULL const RzRegex *regex,
+	RZ_NONNULL const char *text,
+	RzRegexSize text_size,
+	RzRegexSize text_offset,
+	RzRegexFlags mflags) {
+	rz_return_val_if_fail(regex && text, NULL);
+
+	RzPVector *all_matches = rz_pvector_new((RzPVectorFree)rz_pvector_free);
+	RzPVector *matches = rz_regex_match_first(regex, text, text_size, text_offset, mflags);
+	while (matches && rz_pvector_len(matches) > 0) {
+		rz_pvector_push(all_matches, matches);
+		RzRegexMatch *m = rz_pvector_head(matches);
+		// Search again after the last match.
+		text_offset = m->start + m->len;
+		matches = rz_regex_match_first(regex, text, text_size, text_offset, mflags);
+	}
+
+	// Free last vector without matches.
+	rz_pvector_free(matches);
+	return all_matches;
+}
+
+/**
+ * \brief Checks if \p pattern can be found in \p text.
+ *
+ * \param pattern The regex pattern to match.
+ * \param text The text to search in.
+ * \param text_size The length of the buffer pointed to by \p text.
+ * Can be set to RZ_REGEX_ZERO_TERMINATED if the buffer is a zero terminated string.
+ * \param text_offset The offset into \p text from where the search starts.
+ * \param cflags Compile flags.
+ * \param mflags Match flags.
+ *
+ * \return true if the text contains the patterns.
+ * \return false Otherwise
+ */
+RZ_API bool rz_regex_contains(RZ_NONNULL const char *pattern, RZ_NONNULL const char *text,
+	RzRegexSize text_size,
+	RzRegexFlags cflags, RzRegexFlags mflags) {
+	RzRegex *re = rz_regex_new(pattern, cflags, 0);
+	if (!re) {
+		return false;
+	}
+	RzPVector *matches = rz_regex_match_first(re, text, text_size, 0, mflags);
+	bool found = matches != NULL && !rz_pvector_empty(matches);
+	rz_pvector_free(matches);
+	return found;
+}
+
+/**
+ * \brief Searches for a \p pattern in \p text and returns all matches as concatenated string.
+ * Only complete matches are concatenated. Sub-groups are skipped.
+ *
+ * \param pattern The regex pattern to match.
+ * \param text The text to search in.
+ * \param text_size The length of the buffer pointed to by \p text.
+ * Can be set to RZ_REGEX_ZERO_TERMINATED if the buffer is a zero terminated string.
+ * \param text_offset The offset into \p text from where the search starts.
+ * \param cflags Compile flags.
+ * \param mflags Match flags.
+ * \param separator A string to separate the matches.
+ *
+ * \return A string with all matches concatenated or NULL in case of failure.
+ */
+RZ_API RZ_OWN RzStrBuf *rz_regex_full_match_str(RZ_NONNULL const char *pattern, RZ_NONNULL const char *text,
+	RzRegexSize text_size,
+	RzRegexFlags cflags, RzRegexFlags mflags, RZ_NONNULL const char *separator) {
+	rz_return_val_if_fail(pattern && text && separator, NULL);
+
+	RzRegex *re = rz_regex_new(pattern, cflags, 0);
+	RzStrBuf *sbuf = rz_strbuf_new("");
+	RzPVector *matches = rz_regex_match_all(re, text, text_size, 0, mflags);
+	if (!matches || !sbuf) {
+		goto fini;
+	}
+
+	size_t i = 1;
+	void **m;
+	rz_pvector_foreach (matches, m) {
+		RzPVector *match_groups = *m;
+		RzRegexMatch *match = rz_pvector_head(match_groups);
+		const char *t = text + match->start;
+		if (((int)match->len) < 0) {
+			goto fini;
+		}
+		// No separator in case of only one match
+		if (i == rz_pvector_len(matches)) {
+			rz_strbuf_appendf(sbuf, "%-.*s", (int)match->len, t);
+		} else if (!rz_strbuf_appendf(sbuf, "%-.*s%s", (int)match->len, t, separator)) {
+			goto fini;
+		}
+		++i;
+	}
+
+fini:
+	rz_pvector_free(matches);
+	return sbuf;
+}
diff --git a/librz/util/regex/COPYRIGHT b/librz/util/regex/COPYRIGHT
deleted file mode 100644
index a6392fd37c3..00000000000
--- a/librz/util/regex/COPYRIGHT
+++ /dev/null
@@ -1,54 +0,0 @@
-$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $
-
-Copyright 1992, 1993, 1994 Henry Spencer.  All rights reserved.
-This software is not subject to any license of the American Telephone
-and Telegraph Company or of the Regents of the University of California.
-
-Permission is granted to anyone to use this software for any purpose on
-any computer system, and to alter it and redistribute it, subject
-to the following restrictions:
-
-1. The author is not responsible for the consequences of use of this
-   software, no matter how awful, even if they arise from flaws in it.
-
-2. The origin of this software must not be misrepresented, either by
-   explicit claim or by omission.  Since few users ever read sources,
-   credits must appear in the documentation.
-
-3. Altered versions must be plainly marked as such, and must not be
-   misrepresented as being the original software.  Since few users
-   ever read sources, credits must appear in the documentation.
-
-4. This notice may not be removed or altered.
-
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-/*-
- * Copyright (c) 1994
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)COPYRIGHT	8.1 (Berkeley) 3/16/94
- */
diff --git a/librz/util/regex/README b/librz/util/regex/README
deleted file mode 100644
index cc7acd629fd..00000000000
--- a/librz/util/regex/README
+++ /dev/null
@@ -1,5 +0,0 @@
-Based on the OpenBSD's regex implementation
-
-Modified to be portable (now compiles on windows, linux and *bsd including darwin)
-
-cvs -qd anoncvs@anoncvs.ca.openbsd.org:/cvs get -P src/lib/libc/regex
diff --git a/librz/util/regex/cclass.h b/librz/util/regex/cclass.h
deleted file mode 100644
index 00b46e37b62..00000000000
--- a/librz/util/regex/cclass.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*	$OpenBSD: cclass.h,v 1.5 2003/06/02 20:18:36 millert Exp $	*/
-
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)cclass.h	8.3 (Berkeley) 3/20/94
- */
-
-/* character-class table */
-static struct cclass {
-	char *name;
-	char *chars;
-	char *multis;
-} cclasses[] = {
-	{ "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789",
-		"" },
-	{ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
-		"" },
-	{ "blank", " \t", "" },
-	{ "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
-\25\26\27\30\31\32\33\34\35\36\37\177",
-		"" },
-	{ "digit", "0123456789", "" },
-	{ "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
-		"" },
-	{ "lower", "abcdefghijklmnopqrstuvwxyz",
-		"" },
-	{ "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
-		"" },
-	{ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
-		"" },
-	{ "space", "\t\n\v\f\r ", "" },
-	{ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
-		"" },
-	{ "xdigit", "0123456789ABCDEFabcdef",
-		"" },
-	{ NULL, 0, "" }
-};
diff --git a/librz/util/regex/cname.h b/librz/util/regex/cname.h
deleted file mode 100644
index f17991aa285..00000000000
--- a/librz/util/regex/cname.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*	$OpenBSD: cname.h,v 1.5 2003/06/02 20:18:36 millert Exp $	*/
-
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)cname.h	8.3 (Berkeley) 3/20/94
- */
-
-/* character-name table */
-static struct cname {
-	char *name;
-	char code;
-} cnames[] = {
-	{ "NUL", '\0' },
-	{ "SOH", '\001' },
-	{ "STX", '\002' },
-	{ "ETX", '\003' },
-	{ "EOT", '\004' },
-	{ "ENQ", '\005' },
-	{ "ACK", '\006' },
-	{ "BEL", '\007' },
-	{ "alert", '\007' },
-	{ "BS", '\010' },
-	{ "backspace", '\b' },
-	{ "HT", '\011' },
-	{ "tab", '\t' },
-	{ "LF", '\012' },
-	{ "newline", '\n' },
-	{ "VT", '\013' },
-	{ "vertical-tab", '\v' },
-	{ "FF", '\014' },
-	{ "form-feed", '\f' },
-	{ "CR", '\015' },
-	{ "carriage-return", '\r' },
-	{ "SO", '\016' },
-	{ "SI", '\017' },
-	{ "DLE", '\020' },
-	{ "DC1", '\021' },
-	{ "DC2", '\022' },
-	{ "DC3", '\023' },
-	{ "DC4", '\024' },
-	{ "NAK", '\025' },
-	{ "SYN", '\026' },
-	{ "ETB", '\027' },
-	{ "CAN", '\030' },
-	{ "EM", '\031' },
-	{ "SUB", '\032' },
-	{ "ESC", '\033' },
-	{ "IS4", '\034' },
-	{ "FS", '\034' },
-	{ "IS3", '\035' },
-	{ "GS", '\035' },
-	{ "IS2", '\036' },
-	{ "RS", '\036' },
-	{ "IS1", '\037' },
-	{ "US", '\037' },
-	{ "space", ' ' },
-	{ "exclamation-mark", '!' },
-	{ "quotation-mark", '"' },
-	{ "number-sign", '#' },
-	{ "dollar-sign", '$' },
-	{ "percent-sign", '%' },
-	{ "ampersand", '&' },
-	{ "apostrophe", '\'' },
-	{ "left-parenthesis", '(' },
-	{ "right-parenthesis", ')' },
-	{ "asterisk", '*' },
-	{ "plus-sign", '+' },
-	{ "comma", ',' },
-	{ "hyphen", '-' },
-	{ "hyphen-minus", '-' },
-	{ "period", '.' },
-	{ "full-stop", '.' },
-	{ "slash", '/' },
-	{ "solidus", '/' },
-	{ "zero", '0' },
-	{ "one", '1' },
-	{ "two", '2' },
-	{ "three", '3' },
-	{ "four", '4' },
-	{ "five", '5' },
-	{ "six", '6' },
-	{ "seven", '7' },
-	{ "eight", '8' },
-	{ "nine", '9' },
-	{ "colon", ':' },
-	{ "semicolon", ';' },
-	{ "less-than-sign", '<' },
-	{ "equals-sign", '=' },
-	{ "greater-than-sign", '>' },
-	{ "question-mark", '?' },
-	{ "commercial-at", '@' },
-	{ "left-square-bracket", '[' },
-	{ "backslash", '\\' },
-	{ "reverse-solidus", '\\' },
-	{ "right-square-bracket", ']' },
-	{ "circumflex", '^' },
-	{ "circumflex-accent", '^' },
-	{ "underscore", '_' },
-	{ "low-line", '_' },
-	{ "grave-accent", '`' },
-	{ "left-brace", '{' },
-	{ "left-curly-bracket", '{' },
-	{ "vertical-line", '|' },
-	{ "right-brace", '}' },
-	{ "right-curly-bracket", '}' },
-	{ "tilde", '~' },
-	{ "DEL", '\177' },
-	{ NULL, 0 }
-};
diff --git a/librz/util/regex/engine.c b/librz/util/regex/engine.c
deleted file mode 100644
index 1615aeda364..00000000000
--- a/librz/util/regex/engine.c
+++ /dev/null
@@ -1,1076 +0,0 @@
-/*	$OpenBSD: engine.c,v 1.15 2005/08/05 13:03:00 espie Exp $	*/
-
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)engine.c	8.5 (Berkeley) 3/20/94
- */
-
-/*
- * The matching engine and friends.  This file is #included by regexec.c
- * after suitable #defines of a variety of macros used herein, so that
- * different state representations can be used without duplicating masses
- * of code.
- */
-
-#ifdef SNAMES
-#define matcher smatcher
-#define fast    sfast
-#define slow    sslow
-#define dissect sdissect
-#define backref sbackref
-#define step    sstep
-#define print   sprint
-#define at      sat
-#define match   smat
-#define nope    snope
-#endif
-#ifdef LNAMES
-#define matcher lmatcher
-#define fast    lfast
-#define slow    lslow
-#define dissect ldissect
-#define backref lbackref
-#define step    lstep
-#define print   lprint
-#define at      lat
-#define match   lmat
-#define nope    lnope
-#endif
-
-/* another structure passed up and down to avoid zillions of parameters */
-struct match {
-	struct re_guts *g;
-	int eflags;
-	RzRegexMatch *pmatch; /* [nsub+1] (0 element unused) */
-	char *offp; /* offsets work from here */
-	char *beginp; /* start of string -- virtual NUL precedes */
-	char *endp; /* end of string -- virtual NUL here */
-	char *coldp; /* can be no match starting before here */
-	char **lastpos; /* [nplus+1] */
-	STATEVARS;
-	states st; /* current states */
-	states fresh; /* states for a fresh start */
-	states tmp; /* temporary */
-	states empty; /* empty set of states */
-};
-
-static int matcher(struct re_guts *, char *, size_t, RzRegexMatch[], int);
-static char *dissect(struct match *, char *, char *, sopno, sopno);
-static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int);
-static char *fast(struct match *, char *, char *, sopno, sopno);
-static char *slow(struct match *, char *, char *, sopno, sopno);
-static states step(struct re_guts *, sopno, sopno, states, int, states);
-#define MAX_RECURSION 100
-#define BOL           (OUT + 1)
-#define EOL           (BOL + 1)
-#define BOLEOL        (BOL + 2)
-#define NOTHING       (BOL + 3)
-#define BOW           (BOL + 4)
-#define EOW           (BOL + 5)
-#define CODEMAX       (BOL + 5) /* highest code used */
-#define NONCHAR(c)    ((c) > OUT)
-#define NNONCHAR      (CODEMAX - OUT)
-#ifdef REDEBUG
-static void print(struct match *, char *, states, int, FILE *);
-#endif
-#ifdef REDEBUG
-static void at(struct match *, char *, char *, char *, sopno, sopno);
-#endif
-#ifdef REDEBUG
-static char *pchar(int);
-#endif
-
-#ifdef REDEBUG
-#define SP(t, s, c)           print(m, t, s, c, stdout)
-#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
-#define NOTE(str) \
-	{ \
-		if (m->eflags & RZ_REGEX_TRACE) \
-			(void)printf("=%s\n", (str)); \
-	}
-static int nope = 0;
-#else
-#define SP(t, s, c)           /* nothing */
-#define AT(t, p1, p2, s1, s2) /* nothing */
-#define NOTE(s)               /* nothing */
-#endif
-
-/*
- - matcher - the actual matching engine
- */
-static int /* 0 success, RZ_REGEX_NOMATCH failure */
-matcher(struct re_guts *g, char *string, size_t nmatch, RzRegexMatch pmatch[],
-	int eflags) {
-	char *endp;
-	int i;
-	struct match mv;
-	struct match *m = &mv;
-	char *dp;
-	const sopno gf = g->firststate + 1; /* +1 for OEND */
-	const sopno gl = g->laststate;
-	char *start;
-	char *stop;
-
-	/* simplify the situation where possible */
-	if (g->cflags & RZ_REGEX_NOSUB)
-		nmatch = 0;
-	if (eflags & RZ_REGEX_STARTEND) {
-		start = string + pmatch[0].rm_so;
-		stop = string + pmatch[0].rm_eo;
-	} else {
-		start = string;
-		stop = start + strlen(start);
-	}
-	if (stop < start)
-		return (RZ_REGEX_INVARG);
-
-	/* prescreening; this does wonders for this rather slow code */
-	if (g->must != NULL) {
-		for (dp = start; dp < stop; dp++)
-			if (*dp == g->must[0] && stop - dp >= g->mlen &&
-				memcmp(dp, g->must, (size_t)g->mlen) == 0)
-				break;
-		if (dp == stop) /* we didn't find g->must */
-			return (RZ_REGEX_NOMATCH);
-	}
-
-	/* match struct setup */
-	m->g = g;
-	m->eflags = eflags;
-	m->pmatch = NULL;
-	m->lastpos = NULL;
-	m->offp = string;
-	m->beginp = start;
-	m->endp = stop;
-
-	if (m->g->nstates * 4 < m->g->nstates)
-		return RZ_REGEX_NOMATCH;
-	STATESETUP(m, 4);
-	SETUP(m->st);
-	SETUP(m->fresh);
-	SETUP(m->tmp);
-	SETUP(m->empty);
-	CLEAR(m->empty);
-
-	/* this loop does only one repetition except for backrefs */
-	for (;;) {
-		endp = fast(m, start, stop, gf, gl);
-		if (!endp) { /* a miss */
-			free(m->pmatch);
-			free(m->lastpos);
-			STATETEARDOWN(m);
-			return (RZ_REGEX_NOMATCH);
-		}
-		if (nmatch == 0 && !g->backrefs)
-			break; /* no further info needed */
-
-		/* where? */
-		if (!m->coldp) {
-			break;
-		}
-		for (;;) {
-			NOTE("finding start");
-			endp = slow(m, m->coldp, stop, gf, gl);
-			if (endp || m->coldp > m->endp) {
-				break;
-			}
-			m->coldp++;
-		}
-		if (nmatch == 1 && !g->backrefs)
-			break; /* no further info needed */
-
-		/* oh my, he wants the subexpressions... */
-		if (!m->pmatch) {
-			if ((m->g->nsub + 1) * sizeof(RzRegexMatch) < m->g->nsub) {
-				return RZ_REGEX_ESPACE;
-			}
-			m->pmatch = (RzRegexMatch *)malloc((m->g->nsub + 1) *
-				sizeof(RzRegexMatch));
-		}
-		if (!m->pmatch) {
-			STATETEARDOWN(m);
-			return (RZ_REGEX_ESPACE);
-		}
-		for (i = 1; i <= m->g->nsub; i++)
-			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
-		if (!g->backrefs && !(m->eflags & RZ_REGEX_BACKR)) {
-			NOTE("dissecting");
-			dp = dissect(m, m->coldp, endp, gf, gl);
-		} else {
-			if (g->nplus > 0 && !m->lastpos) {
-				if ((g->nplus + 1) * sizeof(char *) < g->nplus) {
-					free(m->pmatch);
-					STATETEARDOWN(m);
-					return RZ_REGEX_ESPACE;
-				}
-				m->lastpos = (char **)malloc((g->nplus + 1) *
-					sizeof(char *));
-			}
-			if (g->nplus > 0 && !m->lastpos) {
-				free(m->pmatch);
-				STATETEARDOWN(m);
-				return (RZ_REGEX_ESPACE);
-			}
-			NOTE("backref dissect");
-			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
-		}
-		if (dp) {
-			break;
-		}
-		/* uh-oh... we couldn't find a subexpression-level match */
-		if (!g->backrefs) { /* must be back references doing it */
-			break;
-		}
-		if (g->nplus || !m->lastpos) {
-			break;
-		}
-		for (;;) {
-			if (dp != NULL || endp <= m->coldp)
-				break; /* defeat */
-			NOTE("backoff");
-			endp = slow(m, m->coldp, endp - 1, gf, gl);
-			if (!endp)
-				break; /* defeat */
-				/* try it on a shorter possibility */
-#ifndef NDEBUG
-			for (i = 1; i <= m->g->nsub; i++) {
-				if (m->pmatch[i].rm_so != -1) {
-					break;
-				}
-				if (m->pmatch[i].rm_eo != -1) {
-					break;
-				}
-			}
-#endif
-			NOTE("backoff dissect");
-			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
-		}
-		if (dp != NULL || dp != endp) /* found a shorter one */
-			break;
-
-		/* despite initial appearances, there is no match here */
-		NOTE("false alarm");
-		if (m->coldp == stop)
-			break;
-		start = m->coldp + 1; /* recycle starting later */
-	}
-
-	/* fill in the details if requested */
-	if (nmatch > 0) {
-		pmatch[0].rm_so = m->coldp - m->offp;
-		pmatch[0].rm_eo = endp - m->offp;
-	}
-	if (nmatch > 1) {
-		if (m->pmatch) {
-			for (i = 1; i < nmatch; i++) {
-				if (i <= m->g->nsub) {
-					pmatch[i] = m->pmatch[i];
-				} else {
-					pmatch[i].rm_so = -1;
-					pmatch[i].rm_eo = -1;
-				}
-			}
-		}
-	}
-
-	if (m->pmatch != NULL)
-		free((char *)m->pmatch);
-	if (m->lastpos != NULL)
-		free((char *)m->lastpos);
-	STATETEARDOWN(m);
-	return (0);
-}
-
-/*
- - dissect - figure out what matched what, no back references
- */
-static char * /* == stop (success) always */
-dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) {
-	int i;
-	sopno ss; /* start sop of current subRE */
-	sopno es; /* end sop of current subRE */
-	char *sp; /* start of string matched by it */
-	char *stp; /* string matched by it cannot pass here */
-	char *rest; /* start of rest of string */
-	char *tail; /* string unmatched by rest of RE */
-	sopno ssub; /* start sop of subsubRE */
-	sopno esub; /* end sop of subsubRE */
-	char *ssp; /* start of string matched by subsubRE */
-	char *sep; /* end of string matched by subsubRE */
-	char *oldssp; /* previous ssp */
-	char *dp;
-
-	AT("diss", start, stop, startst, stopst);
-	sp = start;
-	for (ss = startst; ss < stopst; ss = es) {
-		/* identify end of subRE */
-		es = ss;
-		switch (OP(m->g->strip[es])) {
-		case OPLUS_:
-		case OQUEST_:
-			es += OPND(m->g->strip[es]);
-			break;
-		case OCH_:
-			while (OP(m->g->strip[es]) != O_CH)
-				es += OPND(m->g->strip[es]);
-			break;
-		}
-		es++;
-
-		/* figure out what it matched */
-		switch (OP(m->g->strip[ss])) {
-		case OEND:
-			break;
-		case OCHAR:
-			sp++;
-			break;
-		case OBOL:
-		case OEOL:
-		case OBOW:
-		case OEOW:
-			break;
-		case OANY:
-		case OANYOF:
-			sp++;
-			break;
-		case OBACK_:
-		case O_BACK:
-			break;
-		/* cases where length of match is hard to find */
-		case OQUEST_:
-			stp = stop;
-			for (;;) {
-				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
-				if (rest) { /* it did match */
-					/* could the rest match the rest? */
-					tail = slow(m, rest, stop, es, stopst);
-					if (tail == stop)
-						break; /* yes! */
-					/* no -- try a shorter match for this one */
-					stp = rest - 1;
-				}
-			}
-			ssub = ss + 1;
-			esub = es - 1;
-			/* did innards match? */
-			if (slow(m, sp, rest, ssub, esub) != NULL) {
-				dp = dissect(m, sp, rest, ssub, esub);
-				if (dp != rest)
-					return NULL;
-			} else if (sp != rest)
-				return NULL;
-			sp = rest;
-			break;
-		case OPLUS_:
-			stp = stop;
-			for (;;) {
-				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
-				if (rest != NULL) { /* it did match */
-					/* could the rest match the rest? */
-					tail = slow(m, rest, stop, es, stopst);
-					if (tail == stop)
-						break; /* yes! */
-					/* no -- try a shorter match for this one */
-					stp = rest - 1;
-				}
-			}
-			ssub = ss + 1;
-			esub = es - 1;
-			ssp = sp;
-			oldssp = ssp;
-			for (;;) { /* find last match of innards */
-				sep = slow(m, ssp, rest, ssub, esub);
-				if (!sep || sep == ssp)
-					break; /* failed or matched null */
-				oldssp = ssp; /* on to next try */
-				ssp = sep;
-			}
-			if (!sep) {
-				/* last successful match */
-				sep = ssp;
-				ssp = oldssp;
-			}
-			if (sep == rest) { /* must exhaust substring */
-				if (slow(m, ssp, sep, ssub, esub) == rest) {
-					dp = dissect(m, ssp, sep, ssub, esub);
-					if (dp == sep) {
-						sp = rest;
-					}
-				}
-			}
-			break;
-		case OCH_:
-			stp = stop;
-			for (;;) {
-				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
-				if (rest) { /* it did match */
-					/* could the rest match the rest? */
-					tail = slow(m, rest, stop, es, stopst);
-					if (tail == stop)
-						break; /* yes! */
-					/* no -- try a shorter match for this one */
-					stp = rest - 1;
-				}
-			}
-			ssub = ss + 1;
-			esub = ss + OPND(m->g->strip[ss]) - 1;
-			if (OP(m->g->strip[esub]) != OOR1) {
-				break;
-			}
-			for (;;) { /* find first matching branch */
-				if (slow(m, sp, rest, ssub, esub) == rest)
-					break; /* it matched all of it */
-				/* that one missed, try next one */
-				if (OP(m->g->strip[esub]) == OOR1) {
-					esub++;
-					if (OP(m->g->strip[esub]) == OOR2) {
-						ssub = esub + 1;
-						esub += OPND(m->g->strip[esub]);
-						if (OP(m->g->strip[esub]) == OOR2) {
-							esub--;
-						} else {
-							if (OP(m->g->strip[esub]) != O_CH) {
-								break;
-							}
-						}
-					}
-				}
-			}
-			dp = dissect(m, sp, rest, ssub, esub);
-			if (dp == rest) {
-				sp = rest;
-			}
-			break;
-		case O_PLUS:
-		case O_QUEST:
-		case OOR1:
-		case OOR2:
-		case O_CH:
-			break;
-		case OLPAREN:
-			i = OPND(m->g->strip[ss]);
-			if (i > 0 && i <= m->g->nsub) {
-				m->pmatch[i].rm_so = sp - m->offp;
-			}
-			break;
-		case ORPAREN:
-			i = OPND(m->g->strip[ss]);
-			if (i > 0 && i <= m->g->nsub) {
-				m->pmatch[i].rm_eo = sp - m->offp;
-			}
-			break;
-		default: /* uh oh */
-			break;
-		}
-	}
-
-	if (sp == stop) {
-		return sp;
-	} else {
-		return NULL;
-	}
-}
-
-/*
- - backref - figure out what matched what, figuring in back references
- */
-static char * /* == stop (success) or NULL (failure) */
-backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
-	sopno lev, int rec) /* PLUS nesting level */
-{
-	int i;
-	sopno ss; /* start sop of current subRE */
-	char *sp; /* start of string matched by it */
-	sopno ssub; /* start sop of subsubRE */
-	sopno esub; /* end sop of subsubRE */
-	char *ssp; /* start of string matched by subsubRE */
-	char *dp;
-	size_t len;
-	int hard;
-	sop s;
-	ut64 offsave;
-	cset *cs;
-
-	AT("back", start, stop, startst, stopst);
-	sp = start;
-
-	/* get as far as we can with easy stuff */
-	hard = 0;
-	for (ss = startst; !hard && ss < stopst; ss++)
-		switch (OP(s = m->g->strip[ss])) {
-		case OCHAR:
-			if (sp == stop || *sp++ != (char)OPND(s))
-				return (NULL);
-			break;
-		case OANY:
-			if (sp == stop)
-				return (NULL);
-			sp++;
-			break;
-		case OANYOF:
-			cs = &m->g->sets[OPND(s)];
-			if (sp == stop || !CHIN(cs, *sp++))
-				return (NULL);
-			break;
-		case OBOL:
-			if ((sp == m->beginp && !(m->eflags & RZ_REGEX_NOTBOL)) ||
-				(sp < m->endp && *(sp - 1) == '\n' &&
-					(m->g->cflags & RZ_REGEX_NEWLINE))) { /* yes */
-			} else
-				return (NULL);
-			break;
-		case OEOL:
-			if ((sp == m->endp && !(m->eflags & RZ_REGEX_NOTEOL)) ||
-				(sp < m->endp && *sp == '\n' &&
-					(m->g->cflags & RZ_REGEX_NEWLINE))) { /* yes */
-			} else
-				return (NULL);
-			break;
-		case OBOW:
-			if (((sp == m->beginp && !(m->eflags & RZ_REGEX_NOTBOL)) ||
-				    (sp < m->endp && *(sp - 1) == '\n' &&
-					    (m->g->cflags & RZ_REGEX_NEWLINE)) ||
-				    (sp > m->beginp &&
-					    !ISWORD((unsigned char)*(sp - 1)))) &&
-				(sp < m->endp && ISWORD((unsigned char)*sp))) { /* yes */
-			} else
-				return (NULL);
-			break;
-		case OEOW:
-			if (((sp == m->endp && !(m->eflags & RZ_REGEX_NOTEOL)) ||
-				    (sp < m->endp && *sp == '\n' &&
-					    (m->g->cflags & RZ_REGEX_NEWLINE)) ||
-				    (sp < m->endp && !ISWORD((unsigned char)*sp))) &&
-				(sp > m->beginp && ISWORD((unsigned char)*(sp - 1)))) { /* yes */
-			} else
-				return (NULL);
-			break;
-		case O_QUEST:
-			break;
-		case OOR1: /* matches null but needs to skip */
-			ss++;
-			s = m->g->strip[ss];
-			do {
-				if (OP(s) == OOR2) {
-					ss += OPND(s);
-				}
-			} while (OP(s = m->g->strip[ss]) != O_CH);
-			/* note that the ss++ gets us past the O_CH */
-			break;
-		default: /* have to make a choice */
-			hard = 1;
-			break;
-		}
-	if (!hard) { /* that was it! */
-		if (sp != stop)
-			return (NULL);
-		return (sp);
-	}
-	ss--; /* adjust for the for's final increment */
-
-	/* the hard stuff */
-	AT("hard", sp, stop, ss, stopst);
-	s = m->g->strip[ss];
-	switch (OP(s)) {
-	case OBACK_: /* the vilest depths */
-		i = OPND(s);
-		if (i > 0 && i <= m->g->nsub) {
-			if (m->pmatch[i].rm_eo == -1) {
-				return NULL;
-			}
-		}
-		if (m->pmatch[i].rm_so != -1) {
-			len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
-			if (len == 0 && rec++ > MAX_RECURSION)
-				return (NULL);
-			if (stop - m->beginp >= len) {
-				if (sp > stop - len) {
-					return (NULL); /* not enough left to match */
-				}
-			}
-			ssp = m->offp + m->pmatch[i].rm_so;
-			if (memcmp(sp, ssp, len) != 0)
-				return (NULL);
-			while (m->g->strip[ss] != SOP(O_BACK, i))
-				ss++;
-			return (backref(m, sp + len, stop, ss + 1, stopst, lev, rec));
-		}
-		break;
-	case OQUEST_: /* to null or not */
-		dp = backref(m, sp, stop, ss + 1, stopst, lev, rec);
-		if (dp != NULL)
-			return (dp); /* not */
-		return (backref(m, sp, stop, ss + OPND(s) + 1, stopst, lev, rec));
-		break;
-	case OPLUS_:
-		if (m->lastpos && (lev + 1 <= m->g->nplus)) {
-			m->lastpos[lev + 1] = sp;
-			return (backref(m, sp, stop, ss + 1, stopst, lev + 1, rec));
-		}
-		break;
-	case O_PLUS:
-		if (sp == m->lastpos[lev]) /* last pass matched null */
-			return (backref(m, sp, stop, ss + 1, stopst, lev - 1, rec));
-		/* try another pass */
-		m->lastpos[lev] = sp;
-		dp = backref(m, sp, stop, ss - OPND(s) + 1, stopst, lev, rec);
-		if (!dp)
-			return (backref(m, sp, stop, ss + 1, stopst, lev - 1, rec));
-		else
-			return (dp);
-		break;
-	case OCH_: /* find the right one, if any */
-		ssub = ss + 1;
-		esub = ss + OPND(s) - 1;
-		if (OP(m->g->strip[esub]) != OOR1) {
-			break;
-		}
-		for (;;) { /* find first matching branch */
-			dp = backref(m, sp, stop, ssub, esub, lev, rec);
-			if (dp != NULL)
-				return (dp);
-			/* that one missed, try next one */
-			if (OP(m->g->strip[esub]) == O_CH)
-				return (NULL); /* there is none */
-			esub++;
-			if (OP(m->g->strip[esub]) != OOR2) {
-				break;
-			}
-			ssub = esub + 1;
-			esub += OPND(m->g->strip[esub]);
-			if (OP(m->g->strip[esub]) == OOR2)
-				esub--;
-			else if (OP(m->g->strip[esub]) != O_CH) {
-				break;
-			}
-		}
-		break;
-	case OLPAREN: /* must undo assignment if rest fails */
-		i = OPND(s);
-		if (i > 0 && i <= m->g->nsub) {
-			offsave = m->pmatch[i].rm_so;
-			m->pmatch[i].rm_so = sp - m->offp;
-			dp = backref(m, sp, stop, ss + 1, stopst, lev, rec);
-			if (dp != NULL)
-				return (dp);
-			m->pmatch[i].rm_so = offsave;
-			return (NULL);
-		}
-		break;
-	case ORPAREN: /* must undo assignment if rest fails */
-		i = OPND(s);
-		if (i > 0 && i <= m->g->nsub) {
-			offsave = m->pmatch[i].rm_eo;
-			m->pmatch[i].rm_eo = sp - m->offp;
-			dp = backref(m, sp, stop, ss + 1, stopst, lev, rec);
-			if (dp != NULL)
-				return (dp);
-			m->pmatch[i].rm_eo = offsave;
-			return (NULL);
-		}
-		break;
-	default: /* uh oh */
-		break;
-	}
-
-	/* NOTREACHED */
-	return NULL;
-}
-
-/*
- - fast - step through the string at top speed
- */
-static char * /* where tentative match ended, or NULL */
-fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst) {
-	states st = m->st;
-	states fresh = m->fresh;
-	states tmp = m->tmp;
-	char *p = start;
-	int c = (start == m->beginp) ? OUT : *(start - 1);
-	int lastc; /* previous c */
-	int flagch;
-	int i;
-	char *coldp; /* last p after which no match was underway */
-
-	CLEAR(st);
-	SET1(st, startst);
-	st = step(m->g, startst, stopst, st, NOTHING, st);
-	ASSIGN(fresh, st);
-	SP("start", st, *p);
-	coldp = NULL;
-	for (;;) {
-		/* next character */
-		lastc = c;
-		c = (p == m->endp) ? OUT : *p;
-		if (EQ(st, fresh)) {
-			coldp = p;
-		}
-
-		/* is there an EOL and/or BOL between lastc and c? */
-		flagch = '\0';
-		i = 0;
-		if ((lastc == '\n' && m->g->cflags & RZ_REGEX_NEWLINE) ||
-			(lastc == OUT && !(m->eflags & RZ_REGEX_NOTBOL))) {
-			flagch = BOL;
-			i = m->g->nbol;
-		}
-		if ((c == '\n' && m->g->cflags & RZ_REGEX_NEWLINE) ||
-			(c == OUT && !(m->eflags & RZ_REGEX_NOTEOL))) {
-			flagch = (flagch == BOL) ? BOLEOL : EOL;
-			i += m->g->neol;
-		}
-		if (i != 0) {
-			for (; i > 0; i--)
-				st = step(m->g, startst, stopst, st, flagch, st);
-			SP("boleol", st, c);
-		}
-
-		/* how about a word boundary? */
-		if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
-			(c != OUT && ISWORD(c))) {
-			flagch = BOW;
-		}
-		if ((lastc != OUT && ISWORD(lastc)) &&
-			(flagch == EOL || (c != OUT && !ISWORD(c)))) {
-			flagch = EOW;
-		}
-		if (flagch == BOW || flagch == EOW) {
-			st = step(m->g, startst, stopst, st, flagch, st);
-			SP("boweow", st, c);
-		}
-
-		/* are we done? */
-		if (ISSET(st, stopst) || p == stop)
-			break; /* NOTE BREAK OUT */
-
-		/* no, we must deal with this character */
-		ASSIGN(tmp, st);
-		ASSIGN(st, fresh);
-		if (c == OUT) {
-			break;
-		}
-		st = step(m->g, startst, stopst, tmp, c, st);
-		SP("aft", st, c);
-		ASSIGN(tmp, st);
-		if (!EQ(step(m->g, startst, stopst, tmp, NOTHING, tmp), st)) {
-			break;
-		}
-		p++;
-	}
-
-	if (coldp) {
-		m->coldp = coldp;
-		if (ISSET(st, stopst))
-			return (p + 1);
-	}
-	return NULL;
-}
-
-/*
- - slow - step through the string more deliberately
- */
-static char * /* where it ended */
-slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst) {
-	states st = m->st;
-	states empty = m->empty;
-	states tmp = m->tmp;
-	char *p = start;
-	int c = (start == m->beginp) ? OUT : *(start - 1);
-	int lastc; /* previous c */
-	int flagch;
-	int i;
-	char *matchp; /* last p at which a match ended */
-
-	AT("slow", start, stop, startst, stopst);
-	CLEAR(st);
-	SET1(st, startst);
-	SP("sstart", st, *p);
-	st = step(m->g, startst, stopst, st, NOTHING, st);
-	matchp = NULL;
-	for (;;) {
-		/* next character */
-		lastc = c;
-		c = (p == m->endp) ? OUT : *p;
-
-		/* is there an EOL and/or BOL between lastc and c? */
-		flagch = '\0';
-		i = 0;
-		if ((lastc == '\n' && m->g->cflags & RZ_REGEX_NEWLINE) ||
-			(lastc == OUT && !(m->eflags & RZ_REGEX_NOTBOL))) {
-			flagch = BOL;
-			i = m->g->nbol;
-		}
-		if ((c == '\n' && m->g->cflags & RZ_REGEX_NEWLINE) ||
-			(c == OUT && !(m->eflags & RZ_REGEX_NOTEOL))) {
-			flagch = (flagch == BOL) ? BOLEOL : EOL;
-			i += m->g->neol;
-		}
-		if (i != 0) {
-			for (; i > 0; i--)
-				st = step(m->g, startst, stopst, st, flagch, st);
-			SP("sboleol", st, c);
-		}
-
-		/* how about a word boundary? */
-		if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
-			(c != OUT && ISWORD(c))) {
-			flagch = BOW;
-		}
-		if ((lastc != OUT && ISWORD(lastc)) &&
-			(flagch == EOL || (c != OUT && !ISWORD(c)))) {
-			flagch = EOW;
-		}
-		if (flagch == BOW || flagch == EOW) {
-			st = step(m->g, startst, stopst, st, flagch, st);
-			SP("sboweow", st, c);
-		}
-
-		/* are we done? */
-		if (ISSET(st, stopst))
-			matchp = p;
-		if (EQ(st, empty) || p == stop)
-			break; /* NOTE BREAK OUT */
-
-		/* no, we must deal with this character */
-		ASSIGN(tmp, st);
-		ASSIGN(st, empty);
-		if (c == OUT) {
-			break;
-		}
-		st = step(m->g, startst, stopst, tmp, c, st);
-		SP("saft", st, c);
-		if (!EQ(step(m->g, startst, stopst, st, NOTHING, st), st)) {
-			break;
-		}
-		p++;
-	}
-
-	return (matchp);
-}
-
-/*
- - step - map set of states reachable before char to set reachable after
- */
-static states
-step(struct re_guts *g,
-	sopno start, /* start state within strip */
-	sopno stop, /* state after stop state within strip */
-	states bef, /* states reachable before */
-	int ch, /* character or NONCHAR code */
-	states aft) /* states already known reachable after */
-{
-	cset *cs;
-	sop s;
-	sopno pc;
-	onestate here; /* note, macros know this name */
-	sopno look;
-	int i;
-
-	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
-		s = g->strip[pc];
-		switch (OP(s)) {
-		case OEND:
-			break;
-		case OCHAR:
-			/* only characters can match */
-			if (!NONCHAR(ch) || ch != (char)OPND(s)) {
-				if (ch == (char)OPND(s))
-					FWD(aft, bef, 1);
-			}
-			break;
-		case OBOL:
-			if (ch == BOL || ch == BOLEOL)
-				FWD(aft, bef, 1);
-			break;
-		case OEOL:
-			if (ch == EOL || ch == BOLEOL)
-				FWD(aft, bef, 1);
-			break;
-		case OBOW:
-			if (ch == BOW)
-				FWD(aft, bef, 1);
-			break;
-		case OEOW:
-			if (ch == EOW)
-				FWD(aft, bef, 1);
-			break;
-		case OANY:
-			if (!NONCHAR(ch))
-				FWD(aft, bef, 1);
-			break;
-		case OANYOF:
-			cs = &g->sets[OPND(s)];
-			if (!NONCHAR(ch) && CHIN(cs, ch))
-				FWD(aft, bef, 1);
-			break;
-		case OBACK_: /* ignored here */
-		case O_BACK:
-			FWD(aft, aft, 1);
-			break;
-		case OPLUS_: /* forward, this is just an empty */
-			FWD(aft, aft, 1);
-			break;
-		case O_PLUS: /* both forward and back */
-			FWD(aft, aft, 1);
-			i = ISSETBACK(aft, OPND(s));
-			BACK(aft, aft, OPND(s));
-			if (!i && ISSETBACK(aft, OPND(s))) {
-				/* oho, must reconsider loop body */
-				pc -= OPND(s) + 1;
-				INIT(here, pc);
-			}
-			break;
-		case OQUEST_: /* two branches, both forward */
-			FWD(aft, aft, 1);
-			FWD(aft, aft, OPND(s));
-			break;
-		case O_QUEST: /* just an empty */
-			FWD(aft, aft, 1);
-			break;
-		case OLPAREN: /* not significant here */
-		case ORPAREN:
-			FWD(aft, aft, 1);
-			break;
-		case OCH_: /* mark the first two branches */
-			FWD(aft, aft, 1);
-			if ((OP(g->strip[pc + OPND(s)]) != OOR2)) {
-				break;
-			}
-			FWD(aft, aft, OPND(s));
-			break;
-		case OOR1: /* done a branch, find the O_CH */
-			if (ISSTATEIN(aft, here)) {
-				for (look = 1;
-					OP(s = g->strip[pc + look]) != O_CH;
-					look += OPND(s)) {
-					if (OP(s) != OOR2) {
-						break;
-					}
-				}
-				FWD(aft, aft, look);
-			}
-			break;
-		case OOR2: /* propagate OCH_'s marking */
-			FWD(aft, aft, 1);
-			if (OP(g->strip[pc + OPND(s)]) != O_CH) {
-				if (OP(g->strip[pc + OPND(s)]) == OOR2) {
-					FWD(aft, aft, OPND(s));
-				}
-			}
-			break;
-		case O_CH: /* just empty */
-			FWD(aft, aft, 1);
-			break;
-		default: /* ooooops... */
-			eprintf("ops in regex.c\n");
-			break;
-		}
-	}
-
-	return (aft);
-}
-
-#ifdef REDEBUG
-/*
- - print - print a set of states
- */
-static void
-print(struct match *m, char *caption, states st, int ch, FILE *d) {
-	struct re_guts *g = m->g;
-	int i;
-	int first = 1;
-
-	if (!(m->eflags & RZ_REGEX_TRACE))
-		return;
-
-	(void)fprintf(d, "%s", caption);
-	if (ch != '\0')
-		(void)fprintf(d, " %s", pchar(ch));
-	for (i = 0; i < g->nstates; i++)
-		if (ISSET(st, i)) {
-			(void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
-			first = 0;
-		}
-	(void)fprintf(d, "\n");
-}
-
-/*
- - at - print current situation
- */
-static void
-at(struct match *m, char *title, char *start, char *stop, sopno startst,
-	sopno stopst) {
-	if (!(m->eflags & RZ_REGEX_TRACE))
-		return;
-
-	(void)printf("%s %s-", title, pchar(*start));
-	(void)printf("%s ", pchar(*stop));
-	(void)printf("%ld-%ld\n", (long)startst, (long)stopst);
-}
-
-#ifndef PCHARDONE
-#define PCHARDONE /* never again */
-/*
- - pchar - make a character printable
- *
- * Is this identical to regchar() over in debug.c?  Well, yes.  But a
- * duplicate here avoids having a debugging-capable regexec.o tied to
- * a matching debug.o, and this is convenient.  It all disappears in
- * the non-debug compilation anyway, so it doesn't matter much.
- */
-static char * /* -> representation */
-pchar(int ch) {
-	static char pbuf[10];
-
-	if (isprint((ut8)ch) || ch == ' ')
-		(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
-	else
-		(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
-	return (pbuf);
-}
-#endif
-#endif
-
-#undef matcher
-#undef fast
-#undef slow
-#undef dissect
-#undef backref
-#undef step
-#undef print
-#undef at
-#undef match
-#undef nope
diff --git a/librz/util/regex/re_format.7 b/librz/util/regex/re_format.7
deleted file mode 100644
index 72887175209..00000000000
--- a/librz/util/regex/re_format.7
+++ /dev/null
@@ -1,756 +0,0 @@
-.\"	$OpenBSD: re_format.7,v 1.15 2010/07/15 20:51:38 schwarze Exp $
-.\"
-.\" Copyright (c) 1997, Phillip F Knaack. All rights reserved.
-.\"
-.\" Copyright (c) 1992, 1993, 1994 Henry Spencer.
-.\" Copyright (c) 1992, 1993, 1994
-.\"	The Regents of the University of California.  All rights reserved.
-.\"
-.\" This code is derived from software contributed to Berkeley by
-.\" Henry Spencer.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice, this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\"    may be used to endorse or promote products derived from this software
-.\"    without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\"	@(#)re_format.7	8.3 (Berkeley) 3/20/94
-.\"
-.Dd $Mdocdate: July 15 2010 $
-.Dt RE_FORMAT 7
-.Os
-.Sh NAME
-.Nm re_format
-.Nd POSIX regular expressions
-.Sh DESCRIPTION
-Regular expressions (REs),
-as defined in
-.St -p1003.1-2004 ,
-come in two forms:
-basic regular expressions
-(BREs)
-and extended regular expressions
-(EREs).
-Both forms of regular expressions are supported
-by the interfaces described in
-.Xr regex 3 .
-Applications dealing with regular expressions
-may use one or the other form
-(or indeed both).
-For example,
-.Xr ed 1
-uses BREs,
-whilst
-.Xr egrep 1
-talks EREs.
-Consult the manual page for the specific application to find out which
-it uses.
-.Pp
-POSIX leaves some aspects of RE syntax and semantics open;
-.Sq **
-marks decisions on these aspects that
-may not be fully portable to other POSIX implementations.
-.Pp
-This manual page first describes regular expressions in general,
-specifically extended regular expressions,
-and then discusses differences between them and basic regular expressions.
-.Sh EXTENDED REGULAR EXPRESSIONS
-An ERE is one** or more non-empty**
-.Em branches ,
-separated by
-.Sq \*(Ba .
-It matches anything that matches one of the branches.
-.Pp
-A branch is one** or more
-.Em pieces ,
-concatenated.
-It matches a match for the first, followed by a match for the second, etc.
-.Pp
-A piece is an
-.Em atom
-possibly followed by a single**
-.Sq * ,
-.Sq + ,
-.Sq ?\& ,
-or
-.Em bound .
-An atom followed by
-.Sq *
-matches a sequence of 0 or more matches of the atom.
-An atom followed by
-.Sq +
-matches a sequence of 1 or more matches of the atom.
-An atom followed by
-.Sq ?\&
-matches a sequence of 0 or 1 matches of the atom.
-.Pp
-A bound is
-.Sq {
-followed by an unsigned decimal integer,
-possibly followed by
-.Sq ,\&
-possibly followed by another unsigned decimal integer,
-always followed by
-.Sq } .
-The integers must lie between 0 and
-.Dv RE_DUP_MAX
-(255**) inclusive,
-and if there are two of them, the first may not exceed the second.
-An atom followed by a bound containing one integer
-.Ar i
-and no comma matches
-a sequence of exactly
-.Ar i
-matches of the atom.
-An atom followed by a bound
-containing one integer
-.Ar i
-and a comma matches
-a sequence of
-.Ar i
-or more matches of the atom.
-An atom followed by a bound
-containing two integers
-.Ar i
-and
-.Ar j
-matches a sequence of
-.Ar i
-through
-.Ar j
-(inclusive) matches of the atom.
-.Pp
-An atom is a regular expression enclosed in
-.Sq ()
-(matching a part of the regular expression),
-an empty set of
-.Sq ()
-(matching the null string)**,
-a
-.Em bracket expression
-(see below),
-.Sq .\&
-(matching any single character),
-.Sq ^
-(matching the null string at the beginning of a line),
-.Sq $
-(matching the null string at the end of a line),
-a
-.Sq \e
-followed by one of the characters
-.Sq ^.[$()|*+?{\e
-(matching that character taken as an ordinary character),
-a
-.Sq \e
-followed by any other character**
-(matching that character taken as an ordinary character,
-as if the
-.Sq \e
-had not been present**),
-or a single character with no other significance (matching that character).
-A
-.Sq {
-followed by a character other than a digit is an ordinary character,
-not the beginning of a bound**.
-It is illegal to end an RE with
-.Sq \e .
-.Pp
-A bracket expression is a list of characters enclosed in
-.Sq [] .
-It normally matches any single character from the list (but see below).
-If the list begins with
-.Sq ^ ,
-it matches any single character
-.Em not
-from the rest of the list
-(but see below).
-If two characters in the list are separated by
-.Sq - ,
-this is shorthand for the full
-.Em range
-of characters between those two (inclusive) in the
-collating sequence, e.g.\&
-.Sq [0-9]
-in ASCII matches any decimal digit.
-It is illegal** for two ranges to share an endpoint, e.g.\&
-.Sq a-c-e .
-Ranges are very collating-sequence-dependent,
-and portable programs should avoid relying on them.
-.Pp
-To include a literal
-.Sq ]\&
-in the list, make it the first character
-(following a possible
-.Sq ^ ) .
-To include a literal
-.Sq - ,
-make it the first or last character,
-or the second endpoint of a range.
-To use a literal
-.Sq -
-as the first endpoint of a range,
-enclose it in
-.Sq [.
-and
-.Sq .]
-to make it a collating element (see below).
-With the exception of these and some combinations using
-.Sq \&[
-(see next paragraphs),
-all other special characters, including
-.Sq \e ,
-lose their special significance within a bracket expression.
-.Pp
-Within a bracket expression, a collating element
-(a character,
-a multi-character sequence that collates as if it were a single character,
-or a collating-sequence name for either)
-enclosed in
-.Sq [.
-and
-.Sq .]
-stands for the sequence of characters of that collating element.
-The sequence is a single element of the bracket expression's list.
-A bracket expression containing a multi-character collating element
-can thus match more than one character,
-e.g. if the collating sequence includes a
-.Sq ch
-collating element,
-then the RE
-.Sq [[.ch.]]*c
-matches the first five characters of
-.Sq chchcc .
-.Pp
-Within a bracket expression, a collating element enclosed in
-.Sq [=
-and
-.Sq =]
-is an equivalence class, standing for the sequences of characters
-of all collating elements equivalent to that one, including itself.
-(If there are no other equivalent collating elements,
-the treatment is as if the enclosing delimiters were
-.Sq [.
-and
-.Sq .] . )
-For example, if
-.Sq x
-and
-.Sq y
-are the members of an equivalence class,
-then
-.Sq [[=x=]] ,
-.Sq [[=y=]] ,
-and
-.Sq [xy]
-are all synonymous.
-An equivalence class may not** be an endpoint of a range.
-.Pp
-Within a bracket expression, the name of a
-.Em character class
-enclosed
-in
-.Sq [:
-and
-.Sq :]
-stands for the list of all characters belonging to that class.
-Standard character class names are:
-.Bd -literal -offset indent
-alnum	digit	punct
-alpha	graph	space
-blank	lower	upper
-cntrl	print	xdigit
-.Ed
-.Pp
-These stand for the character classes defined in
-.Xr ctype 3 .
-A locale may provide others.
-A character class may not be used as an endpoint of a range.
-.Pp
-There are two special cases** of bracket expressions:
-the bracket expressions
-.Sq [[:<:]]
-and
-.Sq [[:>:]]
-match the null string at the beginning and end of a word, respectively.
-A word is defined as a sequence of
-characters starting and ending with a word character
-which is neither preceded nor followed by
-word characters.
-A word character is an
-.Em alnum
-character (as defined by
-.Xr ctype 3 )
-or an underscore.
-This is an extension,
-compatible with but not specified by POSIX,
-and should be used with
-caution in software intended to be portable to other systems.
-.Pp
-In the event that an RE could match more than one substring of a given
-string,
-the RE matches the one starting earliest in the string.
-If the RE could match more than one substring starting at that point,
-it matches the longest.
-Subexpressions also match the longest possible substrings, subject to
-the constraint that the whole match be as long as possible,
-with subexpressions starting earlier in the RE taking priority over
-ones starting later.
-Note that higher-level subexpressions thus take priority over
-their lower-level component subexpressions.
-.Pp
-Match lengths are measured in characters, not collating elements.
-A null string is considered longer than no match at all.
-For example,
-.Sq bb*
-matches the three middle characters of
-.Sq abbbc ;
-.Sq (wee|week)(knights|nights)
-matches all ten characters of
-.Sq weeknights ;
-when
-.Sq (.*).*
-is matched against
-.Sq abc ,
-the parenthesized subexpression matches all three characters;
-and when
-.Sq (a*)*
-is matched against
-.Sq bc ,
-both the whole RE and the parenthesized subexpression match the null string.
-.Pp
-If case-independent matching is specified,
-the effect is much as if all case distinctions had vanished from the
-alphabet.
-When an alphabetic that exists in multiple cases appears as an
-ordinary character outside a bracket expression, it is effectively
-transformed into a bracket expression containing both cases,
-e.g.\&
-.Sq x
-becomes
-.Sq [xX] .
-When it appears inside a bracket expression,
-all case counterparts of it are added to the bracket expression,
-so that, for example,
-.Sq [x]
-becomes
-.Sq [xX]
-and
-.Sq [^x]
-becomes
-.Sq [^xX] .
-.Pp
-No particular limit is imposed on the length of REs**.
-Programs intended to be portable should not employ REs longer
-than 256 bytes,
-as an implementation can refuse to accept such REs and remain
-POSIX-compliant.
-.Pp
-The following is a list of extended regular expressions:
-.Bl -tag -width Ds
-.It Ar c
-Any character
-.Ar c
-not listed below matches itself.
-.It \e Ns Ar c
-Any backslash-escaped character
-.Ar c
-matches itself.
-.It \&.
-Matches any single character that is not a newline
-.Pq Sq \en .
-.It Bq Ar char-class
-Matches any single character in
-.Ar char-class .
-To include a
-.Ql \&]
-in
-.Ar char-class ,
-it must be the first character.
-A range of characters may be specified by separating the end characters
-of the range with a
-.Ql - ;
-e.g.\&
-.Ar a-z
-specifies the lower case characters.
-The following literal expressions can also be used in
-.Ar char-class
-to specify sets of characters:
-.Bd -unfilled -offset indent
-[:alnum:] [:cntrl:] [:lower:] [:space:]
-[:alpha:] [:digit:] [:print:] [:upper:]
-[:blank:] [:graph:] [:punct:] [:xdigit:]
-.Ed
-.Pp
-If
-.Ql -
-appears as the first or last character of
-.Ar char-class ,
-then it matches itself.
-All other characters in
-.Ar char-class
-match themselves.
-.Pp
-Patterns in
-.Ar char-class
-of the form
-.Eo [.
-.Ar col-elm
-.Ec .]\&
-or
-.Eo [=
-.Ar col-elm
-.Ec =]\& ,
-where
-.Ar col-elm
-is a collating element, are interpreted according to
-.Xr setlocale 3
-.Pq not currently supported .
-.It Bq ^ Ns Ar char-class
-Matches any single character, other than newline, not in
-.Ar char-class .
-.Ar char-class
-is defined as above.
-.It ^
-If
-.Sq ^
-is the first character of a regular expression, then it
-anchors the regular expression to the beginning of a line.
-Otherwise, it matches itself.
-.It $
-If
-.Sq $
-is the last character of a regular expression,
-it anchors the regular expression to the end of a line.
-Otherwise, it matches itself.
-.It [[:<:]]
-Anchors the single character regular expression or subexpression
-immediately following it to the beginning of a word.
-.It [[:>:]]
-Anchors the single character regular expression or subexpression
-immediately following it to the end of a word.
-.It Pq Ar re
-Defines a subexpression
-.Ar re .
-Any set of characters enclosed in parentheses
-matches whatever the set of characters without parentheses matches
-(that is a long-winded way of saying the constructs
-.Sq (re)
-and
-.Sq re
-match identically).
-.It *
-Matches the single character regular expression or subexpression
-immediately preceding it zero or more times.
-If
-.Sq *
-is the first character of a regular expression or subexpression,
-then it matches itself.
-The
-.Sq *
-operator sometimes yields unexpected results.
-For example, the regular expression
-.Ar b*
-matches the beginning of the string
-.Qq abbb
-(as opposed to the substring
-.Qq bbb ) ,
-since a null match is the only leftmost match.
-.It +
-Matches the singular character regular expression
-or subexpression immediately preceding it
-one or more times.
-.It ?
-Matches the singular character regular expression
-or subexpression immediately preceding it
-0 or 1 times.
-.Sm off
-.It Xo
-.Pf { Ar n , m No }\ \&
-.Pf { Ar n , No }\ \&
-.Pf { Ar n No }
-.Xc
-.Sm on
-Matches the single character regular expression or subexpression
-immediately preceding it at least
-.Ar n
-and at most
-.Ar m
-times.
-If
-.Ar m
-is omitted, then it matches at least
-.Ar n
-times.
-If the comma is also omitted, then it matches exactly
-.Ar n
-times.
-.It \*(Ba
-Used to separate patterns.
-For example,
-the pattern
-.Sq cat\*(Badog
-matches either
-.Sq cat
-or
-.Sq dog .
-.El
-.Sh BASIC REGULAR EXPRESSIONS
-Basic regular expressions differ in several respects:
-.Bl -bullet -offset 3n
-.It
-.Sq \*(Ba ,
-.Sq + ,
-and
-.Sq ?\&
-are ordinary characters and there is no equivalent
-for their functionality.
-.It
-The delimiters for bounds are
-.Sq \e{
-and
-.Sq \e} ,
-with
-.Sq {
-and
-.Sq }
-by themselves ordinary characters.
-.It
-The parentheses for nested subexpressions are
-.Sq \e(
-and
-.Sq \e) ,
-with
-.Sq \&(
-and
-.Sq )\&
-by themselves ordinary characters.
-.It
-.Sq ^
-is an ordinary character except at the beginning of the
-RE or** the beginning of a parenthesized subexpression.
-.It
-.Sq $
-is an ordinary character except at the end of the
-RE or** the end of a parenthesized subexpression.
-.It
-.Sq *
-is an ordinary character if it appears at the beginning of the
-RE or the beginning of a parenthesized subexpression
-(after a possible leading
-.Sq ^ ) .
-.It
-Finally, there is one new type of atom, a
-.Em back-reference :
-.Sq \e
-followed by a non-zero decimal digit
-.Ar d
-matches the same sequence of characters matched by the
-.Ar d Ns th
-parenthesized subexpression
-(numbering subexpressions by the positions of their opening parentheses,
-left to right),
-so that, for example,
-.Sq \e([bc]\e)\e1
-matches
-.Sq bb\&
-or
-.Sq cc
-but not
-.Sq bc .
-.El
-.Pp
-The following is a list of basic regular expressions:
-.Bl -tag -width Ds
-.It Ar c
-Any character
-.Ar c
-not listed below matches itself.
-.It \e Ns Ar c
-Any backslash-escaped character
-.Ar c ,
-except for
-.Sq { ,
-.Sq } ,
-.Sq \&( ,
-and
-.Sq \&) ,
-matches itself.
-.It \&.
-Matches any single character that is not a newline
-.Pq Sq \en .
-.It Bq Ar char-class
-Matches any single character in
-.Ar char-class .
-To include a
-.Ql \&]
-in
-.Ar char-class ,
-it must be the first character.
-A range of characters may be specified by separating the end characters
-of the range with a
-.Ql - ;
-e.g.\&
-.Ar a-z
-specifies the lower case characters.
-The following literal expressions can also be used in
-.Ar char-class
-to specify sets of characters:
-.Bd -unfilled -offset indent
-[:alnum:] [:cntrl:] [:lower:] [:space:]
-[:alpha:] [:digit:] [:print:] [:upper:]
-[:blank:] [:graph:] [:punct:] [:xdigit:]
-.Ed
-.Pp
-If
-.Ql -
-appears as the first or last character of
-.Ar char-class ,
-then it matches itself.
-All other characters in
-.Ar char-class
-match themselves.
-.Pp
-Patterns in
-.Ar char-class
-of the form
-.Eo [.
-.Ar col-elm
-.Ec .]\&
-or
-.Eo [=
-.Ar col-elm
-.Ec =]\& ,
-where
-.Ar col-elm
-is a collating element, are interpreted according to
-.Xr setlocale 3
-.Pq not currently supported .
-.It Bq ^ Ns Ar char-class
-Matches any single character, other than newline, not in
-.Ar char-class .
-.Ar char-class
-is defined as above.
-.It ^
-If
-.Sq ^
-is the first character of a regular expression, then it
-anchors the regular expression to the beginning of a line.
-Otherwise, it matches itself.
-.It $
-If
-.Sq $
-is the last character of a regular expression,
-it anchors the regular expression to the end of a line.
-Otherwise, it matches itself.
-.It [[:<:]]
-Anchors the single character regular expression or subexpression
-immediately following it to the beginning of a word.
-.It [[:>:]]
-Anchors the single character regular expression or subexpression
-immediately following it to the end of a word.
-.It \e( Ns Ar re Ns \e)
-Defines a subexpression
-.Ar re .
-Subexpressions may be nested.
-A subsequent backreference of the form
-.Pf \e Ns Ar n ,
-where
-.Ar n
-is a number in the range [1,9], expands to the text matched by the
-.Ar n Ns th
-subexpression.
-For example, the regular expression
-.Ar \e(.*\e)\e1
-matches any string consisting of identical adjacent substrings.
-Subexpressions are ordered relative to their left delimiter.
-.It *
-Matches the single character regular expression or subexpression
-immediately preceding it zero or more times.
-If
-.Sq *
-is the first character of a regular expression or subexpression,
-then it matches itself.
-The
-.Sq *
-operator sometimes yields unexpected results.
-For example, the regular expression
-.Ar b*
-matches the beginning of the string
-.Qq abbb
-(as opposed to the substring
-.Qq bbb ) ,
-since a null match is the only leftmost match.
-.Sm off
-.It Xo
-.Pf \e{ Ar n , m No \e}\ \&
-.Pf \e{ Ar n , No \e}\ \&
-.Pf \e{ Ar n No \e}
-.Xc
-.Sm on
-Matches the single character regular expression or subexpression
-immediately preceding it at least
-.Ar n
-and at most
-.Ar m
-times.
-If
-.Ar m
-is omitted, then it matches at least
-.Ar n
-times.
-If the comma is also omitted, then it matches exactly
-.Ar n
-times.
-.El
-.Sh SEE ALSO
-.Xr ctype 3 ,
-.Xr regex 3
-.Sh STANDARDS
-.St -p1003.1-2004 :
-Base Definitions, Chapter 9 (Regular Expressions).
-.Sh BUGS
-Having two kinds of REs is a botch.
-.Pp
-The current POSIX spec says that
-.Sq )\&
-is an ordinary character in the absence of an unmatched
-.Sq \&( ;
-this was an unintentional result of a wording error,
-and change is likely.
-Avoid relying on it.
-.Pp
-Back-references are a dreadful botch,
-posing major problems for efficient implementations.
-They are also somewhat vaguely defined
-(does
-.Sq a\e(\e(b\e)*\e2\e)*d
-match
-.Sq abbbd ? ) .
-Avoid using them.
-.Pp
-POSIX's specification of case-independent matching is vague.
-The
-.Dq one case implies all cases
-definition given above
-is the current consensus among implementors as to the right interpretation.
-.Pp
-The syntax for word boundaries is incredibly ugly.
diff --git a/librz/util/regex/regcomp.c b/librz/util/regex/regcomp.c
deleted file mode 100644
index 0962d3c24d6..00000000000
--- a/librz/util/regex/regcomp.c
+++ /dev/null
@@ -1,1786 +0,0 @@
-/*	$OpenBSD: regcomp.c,v 1.20 2010/11/21 00:02:30 tedu Exp $ */
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
- */
-
-#include <sys/types.h>
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include <limits.h>
-#include <stdlib.h>
-#include "rz_regex.h"
-#include "rz_util/rz_str.h"
-#include "rz_util/rz_assert.h"
-
-#include "utils.h"
-#include "regex2.h"
-
-#include "cclass.h"
-#include "cname.h"
-
-/*
- * parse structure, passed up and down to avoid global variables and
- * other clumsinesses
- */
-struct parse {
-	char *next; /* next character in RE */
-	char *end; /* end of string (-> NUL normally) */
-	int error; /* has an error been seen? */
-	sop *strip; /* malloced strip */
-	sopno ssize; /* malloced strip size (allocated) */
-	sopno slen; /* malloced strip length (used) */
-	int ncsalloc; /* number of csets allocated */
-	struct re_guts *g;
-#define NPAREN 10 /* we need to remember () 1-9 for back refs */
-	sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
-	sopno pend[NPAREN]; /* -> ) ([0] unused) */
-};
-
-static void p_ere(struct parse *, int);
-static void p_ere_exp(struct parse *);
-static void p_str(struct parse *);
-static void p_bre(struct parse *, int, int);
-static int p_simp_re(struct parse *, int);
-static int p_count(struct parse *);
-static void p_bracket(struct parse *);
-static void p_b_term(struct parse *, cset *);
-static void p_b_cclass(struct parse *, cset *);
-static void p_b_eclass(struct parse *, cset *);
-static char p_b_symbol(struct parse *);
-static char p_b_coll_elem(struct parse *, int);
-static char othercase(int);
-static void bothcases(struct parse *, int);
-static void ordinary(struct parse *, int);
-static void special(struct parse *, int);
-static void nonnewline(struct parse *);
-static void repeat(struct parse *, sopno, int, int);
-static int seterr(struct parse *, int);
-static cset *allocset(struct parse *);
-static void freeset(struct parse *, cset *);
-static int freezeset(struct parse *, cset *);
-static int firstch(struct parse *, cset *);
-static int nch(struct parse *, cset *);
-static void mcadd(struct parse *, cset *, char *);
-static void mcinvert(struct parse *, cset *);
-static void mccase(struct parse *, cset *);
-static int isinsets(struct re_guts *, int);
-static int samesets(struct re_guts *, int, int);
-static void categorize(struct parse *, struct re_guts *);
-static sopno dupl(struct parse *, sopno, sopno);
-static void doemit(struct parse *, sop, size_t);
-static void doinsert(struct parse *, sop, size_t, sopno);
-static void dofwd(struct parse *, sopno, sop);
-static void enlarge(struct parse *, sopno);
-static void stripsnug(struct parse *, struct re_guts *);
-static void findmust(struct parse *, struct re_guts *);
-static sopno pluscount(struct parse *, struct re_guts *);
-
-static char nuls[10]; /* place to point scanner in event of error */
-
-/*
- * macros for use with parse structure
- * BEWARE:  these know that the parse structure is named `p' !!!
- */
-#define PEEK()           (*p->next)
-#define PEEK2()          (*(p->next + 1))
-#define MORE()           (p->next < p->end)
-#define MORE2()          (p->next + 1 < p->end)
-#define SEE(c)           (MORE() && PEEK() == (c))
-#define SEETWO(a, b)     (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
-#define EAT(c)           ((SEE(c)) ? (NEXT(), 1) : 0)
-#define EATTWO(a, b)     ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
-#define NEXT()           (p->next++)
-#define NEXT2()          (p->next += 2)
-#define NEXTn(n)         (p->next += (n))
-#define GETNEXT()        (*p->next++)
-#define SETERROR(e)      seterr(p, (e))
-#define REQUIRE(co, e)   (void)((co) || SETERROR(e))
-#define MUSTSEE(c, e)    (REQUIRE(MORE() && PEEK() == (c), e))
-#define MUSTEAT(c, e)    (REQUIRE(MORE() && GETNEXT() == (c), e))
-#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
-#define EMIT(op, sopnd)  doemit(p, (sop)(op), (size_t)(sopnd))
-#define INSERT(op, pos)  doinsert(p, (sop)(op), HERE() - (pos) + 1, pos)
-#define AHEAD(pos)       dofwd(p, pos, HERE() - (pos))
-#define ASTERN(sop, pos) EMIT(sop, HERE() - (pos))
-#define HERE()           (p->slen)
-#define THERE()          (p->slen - 1)
-#define THERETHERE()     (p->slen - 2)
-#define DROP(n)          (p->slen -= (n))
-
-RZ_API int rz_regex_match(const char *pattern, const char *flags, const char *text) {
-	int ret;
-	RzRegex rx;
-	int re_flags = rz_regex_flags(flags);
-	if (rz_regex_comp(&rx, pattern, re_flags)) {
-		eprintf("FAIL TO COMPILE %s\n", pattern);
-		return 0;
-	}
-	ret = rz_regex_exec(&rx, text, 0, 0, re_flags);
-	rz_regex_fini(&rx);
-	return ret ? 0 : 1;
-}
-
-/**
- * Extract the string matched by given regex match
- *
- * \param str must be the exact string \p match was originally matched from
- * \param match a match pointing into \p str, may be -1/-1 (not found) in which case NULL will be returned
- * \return a heap-allocated string representing the contents of \p match or NULL if unmatched
- */
-RZ_API char *rz_regex_match_extract(RZ_NONNULL const char *str, RZ_NONNULL RzRegexMatch *match) {
-	rz_return_val_if_fail(str && match, NULL);
-	if (match->rm_eo < 0 || match->rm_so < 0) {
-		return NULL;
-	}
-	size_t entry_len = match->rm_eo - match->rm_so + 1;
-	char *r = RZ_NEWS0(char, entry_len);
-	if (!r) {
-		return NULL;
-	}
-	rz_str_ncpy(r, str + match->rm_so, entry_len);
-	return r;
-}
-
-RZ_API RzList /*<char *>*/ *rz_regex_get_match_list(const char *pattern, const char *flags, const char *text) {
-	RzList *list = rz_list_newf(free);
-	RzRegex rx;
-	RzRegexMatch match;
-	int re_flags = rz_regex_flags(flags);
-	if (rz_regex_comp(&rx, pattern, re_flags)) {
-		eprintf("Failed to compile regexp: %s\n", pattern);
-		return NULL;
-	}
-
-	/* Initialize the boundaries for RZ_REGEX_STARTEND */
-	match.rm_so = 0;
-	match.rm_eo = strlen(text);
-	while (!rz_regex_exec(&rx, text, 1, &match, re_flags | RZ_REGEX_STARTEND)) {
-		char *entry = rz_regex_match_extract(text, &match);
-		if (entry) {
-			rz_list_append(list, entry);
-		}
-		/* Update the boundaries for RZ_REGEX_STARTEND */
-		match.rm_so = match.rm_eo;
-		match.rm_eo = strlen(text);
-	}
-	rz_regex_fini(&rx);
-	return list;
-}
-
-RZ_API RzRegex *rz_regex_new(const char *pattern, const char *flags) {
-	rz_return_val_if_fail(pattern, NULL);
-	RzRegex *r, rx = { 0 };
-	if (rz_regex_comp(&rx, pattern, rz_regex_flags(flags))) {
-		return NULL;
-	}
-	r = RZ_NEW(RzRegex);
-	if (!r) {
-		return NULL;
-	}
-	memcpy(r, &rx, sizeof(RzRegex));
-	return r;
-}
-
-RZ_API int rz_regex_flags(const char *f) {
-	int flags = 0;
-	if (!f || !*f) {
-		return 0;
-	}
-	if (strchr(f, 'e')) {
-		flags |= RZ_REGEX_EXTENDED;
-	}
-	if (strchr(f, 'i')) {
-		flags |= RZ_REGEX_ICASE;
-	}
-	if (strchr(f, 's')) {
-		flags |= RZ_REGEX_NOSUB;
-	}
-	if (strchr(f, 'n')) {
-		flags |= RZ_REGEX_NEWLINE;
-	}
-	if (strchr(f, 'N')) {
-		flags |= RZ_REGEX_NOSPEC;
-	}
-	if (strchr(f, 'p')) {
-		flags |= RZ_REGEX_PEND;
-	}
-	if (strchr(f, 'd')) {
-		flags |= RZ_REGEX_DUMP;
-	}
-	return flags;
-}
-
-RZ_API void rz_regex_fini(RzRegex *preg) {
-	struct re_guts *g;
-	if (!preg) {
-		return;
-	}
-	if (preg->re_magic != MAGIC1) { /* oops */
-		return; /* nice to complain, but hard */
-	}
-
-	g = preg->re_g;
-	if (!g || g->magic != MAGIC2) { /* oops again */
-		return;
-	}
-	preg->re_magic = 0; /* mark it invalid */
-	g->magic = 0; /* mark it invalid */
-
-	free(g->strip);
-	free(g->sets);
-	free(g->setbits);
-	free(g->must);
-	free(g);
-}
-
-RZ_API void rz_regex_free(RzRegex *preg) {
-	rz_regex_fini(preg);
-	free(preg);
-}
-
-/*
- - regcomp - interface for parser and compilation
- - 0 success, otherwise RZ_REGEX_something
- */
-RZ_API int rz_regex_comp(RzRegex *preg, const char *pattern, int cflags) {
-	struct parse pa;
-	struct re_guts *g;
-	struct parse *p = &pa;
-	int i;
-	size_t len;
-#ifdef REDEBUG
-#define GOODFLAGS(f) (f)
-#else
-#define GOODFLAGS(f) ((f) & ~RZ_REGEX_DUMP)
-#endif
-	cflags = GOODFLAGS(cflags);
-	if (!preg || ((cflags & RZ_REGEX_EXTENDED) && (cflags & RZ_REGEX_NOSPEC))) {
-		return RZ_REGEX_INVARG;
-	}
-	if (cflags & RZ_REGEX_PEND) {
-		if (preg->re_endp < pattern) {
-			return RZ_REGEX_INVARG;
-		}
-		len = preg->re_endp - pattern;
-	} else {
-		len = strlen((char *)pattern);
-	}
-	/* do the mallocs early so failure handling is easy */
-	g = calloc(1, sizeof(struct re_guts) + (NC - 1));
-	if (!g) {
-		return RZ_REGEX_ESPACE;
-	}
-	/*
-	 * Limit the pattern space to avoid a 32-bit overflow on buffer
-	 * extension.  Also avoid any signed overflow in case of conversion
-	 * so make the real limit based on a 31-bit overflow.
-	 *
-	 * Likely not applicable on 64-bit systems but handle the case
-	 * generically (who are we to stop people from using ~715MB+
-	 * patterns?).
-	 */
-	size_t maxlen = ((size_t)-1 >> 1) / sizeof(sop) * 2 / 3;
-	if (len >= maxlen) {
-		free(g);
-		return RZ_REGEX_ESPACE;
-	}
-	preg->re_flags = cflags;
-	p->ssize = len / (size_t)2 * (size_t)3 + (size_t)1; /* ugh */
-	if (p->ssize < len) {
-		free(g);
-		return RZ_REGEX_ESPACE;
-	}
-
-	p->strip = (sop *)calloc(p->ssize, sizeof(sop));
-	if (!p->strip) {
-		free(g);
-		return RZ_REGEX_ESPACE;
-	}
-	p->slen = 0;
-	if (!p->strip) {
-		free(g);
-		return RZ_REGEX_ESPACE;
-	}
-
-	/* set things up */
-	p->g = g;
-	p->next = (char *)pattern; /* convenience; we do not modify it */
-	p->end = p->next + len;
-	p->error = 0;
-	p->ncsalloc = 0;
-	for (i = 0; i < NPAREN; i++) {
-		p->pbegin[i] = 0;
-		p->pend[i] = 0;
-	}
-	g->csetsize = NC;
-	g->sets = NULL;
-	g->setbits = NULL;
-	g->ncsets = 0;
-	g->cflags = cflags;
-	g->iflags = 0;
-	g->nbol = 0;
-	g->neol = 0;
-	g->must = NULL;
-	g->mlen = 0;
-	g->nsub = 0;
-	g->ncategories = 1; /* category 0 is "everything else" */
-	g->categories = &g->catspace[-(CHAR_MIN)];
-	(void)memset((char *)g->catspace, 0, NC * sizeof(cat_t));
-	g->backrefs = 0;
-
-	/* do it */
-	EMIT(OEND, 0);
-	g->firststate = THERE();
-	if (cflags & RZ_REGEX_EXTENDED) {
-		p_ere(p, OUT);
-	} else if (cflags & RZ_REGEX_NOSPEC) {
-		p_str(p);
-	} else {
-		p_bre(p, OUT, OUT);
-	}
-	EMIT(OEND, 0);
-	g->laststate = THERE();
-
-	/* tidy up loose ends and fill things in */
-	categorize(p, g);
-	stripsnug(p, g);
-	findmust(p, g);
-	g->nplus = pluscount(p, g);
-	g->magic = MAGIC2;
-	preg->re_nsub = g->nsub;
-	preg->re_g = g;
-	preg->re_magic = MAGIC1;
-#ifndef REDEBUG
-	/* not debugging, so can't rely on the asssert() in regexec() */
-	if (g->iflags & BAD) {
-		SETERROR(RZ_REGEX_ASSERT);
-	}
-#endif
-	if (p->error) {
-		rz_regex_fini(preg);
-	}
-	return p->error;
-}
-
-/*
- - p_ere - ERE parser top level, concatenation and alternation
- */
-static void p_ere(struct parse *p, int stop) { /* character this ERE should end at */
-	bool isFirst = true;
-	sopno prevback = 0;
-	sopno prevfwd = 0;
-	sopno conc = 0;
-	char c;
-
-	for (;;) {
-		/* do a bunch of concatenated expressions */
-		conc = HERE();
-		while (MORE() && (c = PEEK()) != '|' && c != stop) {
-			p_ere_exp(p);
-		}
-		REQUIRE(HERE() != conc, RZ_REGEX_EMPTY); /* require nonempty */
-
-		if (!EAT('|')) {
-			break; /* NOTE BREAK OUT */
-		}
-		if (isFirst) {
-			INSERT(OCH_, conc); /* offset is wrong */
-			prevfwd = conc;
-			prevback = conc;
-			isFirst = false;
-		}
-		ASTERN(OOR1, prevback);
-		prevback = THERE();
-		AHEAD(prevfwd); /* fix previous offset */
-		prevfwd = HERE();
-		EMIT(OOR2, 0); /* offset is very wrong */
-	}
-
-	if (!isFirst) { /* tail-end fixups */
-		AHEAD(prevfwd);
-		ASTERN(O_CH, prevback);
-	}
-	// asert(!MORE() || SEE(stop));
-}
-
-/*
- - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
- */
-static void p_ere_exp(struct parse *p) {
-	char c;
-	sopno pos;
-	int count;
-	int count2;
-	sopno subno;
-	int wascaret = 0;
-
-	if (!MORE()) { /* caller should have ensured this */
-		return;
-	}
-	c = GETNEXT();
-
-	pos = HERE();
-	switch (c) {
-	case '(':
-		REQUIRE(MORE(), RZ_REGEX_EPAREN);
-		p->g->nsub++;
-		subno = p->g->nsub;
-		if (subno < NPAREN) {
-			p->pbegin[subno] = HERE();
-		}
-		EMIT(OLPAREN, subno);
-		if (!SEE(')')) {
-			p_ere(p, ')');
-		}
-		if (subno < NPAREN) {
-			p->pend[subno] = HERE();
-			if (!p->pend[subno]) {
-				break;
-			}
-		}
-		EMIT(ORPAREN, subno);
-		MUSTEAT(')', RZ_REGEX_EPAREN);
-		break;
-	case '^':
-		EMIT(OBOL, 0);
-		p->g->iflags |= USEBOL;
-		p->g->nbol++;
-		wascaret = 1;
-		break;
-	case '$':
-		EMIT(OEOL, 0);
-		p->g->iflags |= USEEOL;
-		p->g->neol++;
-		break;
-	case '|':
-		SETERROR(RZ_REGEX_EMPTY);
-		break;
-	case '*':
-	case '+':
-	case '?':
-		SETERROR(RZ_REGEX_BADRPT);
-		break;
-	case '.':
-		if (p->g->cflags & RZ_REGEX_NEWLINE) {
-			nonnewline(p);
-		} else {
-			EMIT(OANY, 0);
-		}
-		break;
-	case '[':
-		p_bracket(p);
-		break;
-	case '\\':
-		REQUIRE(MORE(), RZ_REGEX_EESCAPE);
-		c = GETNEXT();
-		if (!isalpha(c)) {
-			ordinary(p, c);
-		} else {
-			special(p, c);
-		}
-		break;
-	case '{': /* okay as ordinary except if digit follows */
-		REQUIRE(!MORE() || !isdigit((ut8)PEEK()), RZ_REGEX_BADRPT);
-		/* FALLTHROUGH */
-	default:
-		ordinary(p, c);
-		break;
-	}
-
-	if (!MORE()) {
-		return;
-	}
-	c = PEEK();
-	/* we call { a repetition if followed by a digit */
-	if (!(c == '*' || c == '+' || c == '?' ||
-		    (c == '{' && MORE2() && isdigit((ut8)PEEK2())))) {
-		return; /* no repetition, we're done */
-	}
-	NEXT();
-
-	REQUIRE(!wascaret, RZ_REGEX_BADRPT);
-	switch (c) {
-	case '*': /* implemented as +? */
-		/* this case does not require the (y|) trick, noKLUDGE */
-		INSERT(OPLUS_, pos);
-		ASTERN(O_PLUS, pos);
-		INSERT(OQUEST_, pos);
-		ASTERN(O_QUEST, pos);
-		break;
-	case '+':
-		INSERT(OPLUS_, pos);
-		ASTERN(O_PLUS, pos);
-		break;
-	case '?':
-		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
-		INSERT(OCH_, pos); /* offset slightly wrong */
-		ASTERN(OOR1, pos); /* this one's right */
-		AHEAD(pos); /* fix the OCH_ */
-		EMIT(OOR2, 0); /* offset very wrong... */
-		AHEAD(THERE()); /* ...so fix it */
-		ASTERN(O_CH, THERETHERE());
-		break;
-	case '{':
-		count = p_count(p);
-		if (EAT(',')) {
-			if (isdigit((ut8)PEEK())) {
-				count2 = p_count(p);
-				REQUIRE(count <= count2, RZ_REGEX_BADBR);
-			} else { /* single number with comma */
-				count2 = INTFINITY;
-			}
-		} else { /* just a single number */
-			count2 = count;
-		}
-		repeat(p, pos, count, count2);
-		if (!EAT('}')) { /* error heuristics */
-			while (MORE() && PEEK() != '}') {
-				NEXT();
-			}
-			REQUIRE(MORE(), RZ_REGEX_EBRACE);
-			SETERROR(RZ_REGEX_BADBR);
-		}
-		break;
-	}
-
-	if (!MORE()) {
-		return;
-	}
-	c = PEEK();
-	if (!(c == '*' || c == '+' || c == '?' ||
-		    (c == '{' && MORE2() && isdigit((ut8)PEEK2())))) {
-		return;
-	}
-	SETERROR(RZ_REGEX_BADRPT);
-}
-
-/*
- - p_str - string (no metacharacters) "parser"
- */
-static void p_str(struct parse *p) {
-	REQUIRE(MORE(), RZ_REGEX_EMPTY);
-	while (MORE()) {
-		ordinary(p, GETNEXT());
-	}
-}
-
-/*
- - p_bre - BRE parser top level, anchoring and concatenation
- * Giving end1 as OUT essentially eliminates the end1/end2 check.
- *
- * This implementation is a bit of a kludge, in that a trailing $ is first
- * taken as an ordinary character and then revised to be an anchor.  The
- * only undesirable side effect is that '$' gets included as a character
- * category in such cases.  This is fairly harmless; not worth fixing.
- * The amount of lookahead needed to avoid this kludge is excessive.
- */
-static void p_bre(struct parse *p,
-	int end1, /* first terminating character */
-	int end2) /* second terminating character */
-{
-	sopno start = HERE();
-	int first = 1; /* first subexpression? */
-	int wasdollar = 0;
-
-	if (EAT('^')) {
-		EMIT(OBOL, 0);
-		p->g->iflags |= USEBOL;
-		p->g->nbol++;
-	}
-	while (MORE() && !SEETWO(end1, end2)) {
-		wasdollar = p_simp_re(p, first);
-		first = 0;
-	}
-	if (wasdollar) { /* oops, that was a trailing anchor */
-		DROP(1);
-		EMIT(OEOL, 0);
-		p->g->iflags |= USEEOL;
-		p->g->neol++;
-	}
-
-	REQUIRE(HERE() != start, RZ_REGEX_EMPTY); /* require nonempty */
-}
-
-/*
- - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
- */
-static int /* was the simple RE an unbackslashed $? */
-p_simp_re(struct parse *p,
-	int starordinary) /* is a leading * an ordinary character? */
-{
-	int c;
-	int count;
-	int count2;
-	sopno pos;
-	int i;
-	sopno subno;
-#define BACKSL (1 << CHAR_BIT)
-
-	pos = HERE(); /* repetion op, if any, covers from here */
-
-	if (!MORE()) { /* caller should have ensured this */
-		return 0;
-	}
-	c = GETNEXT();
-	if (c == '\\') {
-		REQUIRE(MORE(), RZ_REGEX_EESCAPE);
-		c = BACKSL | GETNEXT();
-	}
-	switch (c) {
-	case '.':
-		if (p->g->cflags & RZ_REGEX_NEWLINE) {
-			nonnewline(p);
-		} else {
-			EMIT(OANY, 0);
-		}
-		break;
-	case '[':
-		p_bracket(p);
-		break;
-	case BACKSL | '{':
-		SETERROR(RZ_REGEX_BADRPT);
-		break;
-	case BACKSL | '(':
-		p->g->nsub++;
-		subno = p->g->nsub;
-		if (subno < NPAREN) {
-			p->pbegin[subno] = HERE();
-		}
-		EMIT(OLPAREN, subno);
-		/* the MORE here is an error heuristic */
-		if (MORE() && !SEETWO('\\', ')')) {
-			p_bre(p, '\\', ')');
-		}
-		if (subno < NPAREN) {
-			p->pend[subno] = HERE();
-			if (!p->pend[subno]) {
-				break;
-			}
-		}
-		EMIT(ORPAREN, subno);
-		REQUIRE(EATTWO('\\', ')'), RZ_REGEX_EPAREN);
-		break;
-	case BACKSL | ')': /* should not get here -- must be user */
-	case BACKSL | '}':
-		SETERROR(RZ_REGEX_EPAREN);
-		break;
-	case BACKSL | '1':
-	case BACKSL | '2':
-	case BACKSL | '3':
-	case BACKSL | '4':
-	case BACKSL | '5':
-	case BACKSL | '6':
-	case BACKSL | '7':
-	case BACKSL | '8':
-	case BACKSL | '9':
-		i = (c & ~BACKSL) - '0';
-		if (p->pend[i] != 0) {
-			if (i <= p->g->nsub) {
-				EMIT(OBACK_, i);
-				if (p->pbegin[i] != 0 && OP(p->strip[p->pbegin[i]]) == OLPAREN &&
-					OP(p->strip[p->pend[i]]) == ORPAREN) {
-					(void)dupl(p, p->pbegin[i] + 1, p->pend[i]);
-					EMIT(O_BACK, i);
-				}
-			}
-		} else {
-			SETERROR(RZ_REGEX_ESUBREG);
-		}
-		p->g->backrefs = 1;
-		break;
-	case '*':
-		REQUIRE(starordinary, RZ_REGEX_BADRPT);
-		/* FALLTHROUGH */
-	default:
-		ordinary(p, (char)c);
-		break;
-	}
-
-	if (EAT('*')) { /* implemented as +? */
-		/* this case does not require the (y|) trick, noKLUDGE */
-		INSERT(OPLUS_, pos);
-		ASTERN(O_PLUS, pos);
-		INSERT(OQUEST_, pos);
-		ASTERN(O_QUEST, pos);
-	} else if (EATTWO('\\', '{')) {
-		count = p_count(p);
-		if (EAT(',')) {
-			if (MORE() && isdigit((ut8)PEEK())) {
-				count2 = p_count(p);
-				REQUIRE(count <= count2, RZ_REGEX_BADBR);
-			} else { /* single number with comma */
-				count2 = INTFINITY;
-			}
-		} else { /* just a single number */
-			count2 = count;
-		}
-		repeat(p, pos, count, count2);
-		if (!EATTWO('\\', '}')) { /* error heuristics */
-			while (MORE() && !SEETWO('\\', '}')) {
-				NEXT();
-			}
-			REQUIRE(MORE(), RZ_REGEX_EBRACE);
-			SETERROR(RZ_REGEX_BADBR);
-		}
-	} else if (c == '$') { /* $ (but not \$) ends it */
-		return (1);
-	}
-
-	return (0);
-}
-
-/*
- - p_count - parse a repetition count
- */
-static int /* the value */
-p_count(struct parse *p) {
-	int count = 0;
-	int ndigits = 0;
-
-	while (MORE() && isdigit((ut8)PEEK()) && count <= DUPMAX) {
-		count = count * 10 + (GETNEXT() - '0');
-		ndigits++;
-	}
-
-	REQUIRE(ndigits > 0 && count <= DUPMAX, RZ_REGEX_BADBR);
-	return (count);
-}
-
-/*
- - p_bracket - parse a bracketed character list
- *
- * Note a significant property of this code:  if the allocset() did SETERROR,
- * no set operations are done.
- */
-static void p_bracket(struct parse *p) {
-	cset *cs;
-	int invert = 0;
-
-	/* Dept of Truly Sickening Special-Case Kludges */
-	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
-		EMIT(OBOW, 0);
-		NEXTn(6);
-		return;
-	}
-	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
-		EMIT(OEOW, 0);
-		NEXTn(6);
-		return;
-	}
-
-	if (!(cs = allocset(p))) {
-		/* allocset did set error status in p */
-		return;
-	}
-
-	if (EAT('^')) {
-		invert++; /* make note to invert set at end */
-	}
-	if (EAT(']')) {
-		CHadd(cs, ']');
-	} else if (EAT('-')) {
-		CHadd(cs, '-');
-	}
-	while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) {
-		p_b_term(p, cs);
-	}
-	if (EAT('-')) {
-		CHadd(cs, '-');
-	}
-	MUSTEAT(']', RZ_REGEX_EBRACK);
-
-	if (p->error != 0) { /* don't mess things up further */
-		freeset(p, cs);
-		return;
-	}
-
-	if (p->g->cflags & RZ_REGEX_ICASE) {
-		int i;
-		int ci;
-
-		for (i = p->g->csetsize - 1; i >= 0; i--) {
-			if (CHIN(cs, i) && isalpha(i)) {
-				ci = othercase(i);
-				if (ci != i) {
-					CHadd(cs, ci);
-				}
-			}
-		}
-		if (cs->multis != NULL) {
-			mccase(p, cs);
-		}
-	}
-	if (invert) {
-		int i;
-
-		for (i = p->g->csetsize - 1; i >= 0; i--) {
-			if (CHIN(cs, i)) {
-				CHsub(cs, i);
-			} else {
-				CHadd(cs, i);
-			}
-		}
-		if (p->g->cflags & RZ_REGEX_NEWLINE) {
-			CHsub(cs, '\n');
-		}
-		if (cs->multis != NULL) {
-			mcinvert(p, cs);
-		}
-	}
-
-	if (cs->multis) { /* xxx */
-		return;
-	}
-
-	if (nch(p, cs) == 1) { /* optimize singleton sets */
-		ordinary(p, firstch(p, cs));
-		freeset(p, cs);
-	} else {
-		EMIT(OANYOF, freezeset(p, cs));
-	}
-}
-
-/*
- - p_b_term - parse one term of a bracketed character list
- */
-static void p_b_term(struct parse *p, cset *cs) {
-	char c;
-	char start = 0, finish;
-	int i;
-
-	/* classify what we've got */
-	switch ((MORE()) ? PEEK() : '\0') {
-	case '[':
-		c = (MORE2()) ? PEEK2() : '\0';
-		break;
-	case '-':
-		SETERROR(RZ_REGEX_ERANGE);
-		return; /* NOTE RETURN */
-		break;
-	default:
-		c = '\0';
-		break;
-	}
-
-	switch (c) {
-	case ':': /* character class */
-		NEXT2();
-		REQUIRE(MORE(), RZ_REGEX_EBRACK);
-		c = PEEK();
-		REQUIRE(c != '-' && c != ']', RZ_REGEX_ECTYPE);
-		p_b_cclass(p, cs);
-		REQUIRE(MORE(), RZ_REGEX_EBRACK);
-		REQUIRE(EATTWO(':', ']'), RZ_REGEX_ECTYPE);
-		break;
-	case '=': /* equivalence class */
-		NEXT2();
-		REQUIRE(MORE(), RZ_REGEX_EBRACK);
-		c = PEEK();
-		REQUIRE(c != '-' && c != ']', RZ_REGEX_ECOLLATE);
-		p_b_eclass(p, cs);
-		REQUIRE(MORE(), RZ_REGEX_EBRACK);
-		REQUIRE(EATTWO('=', ']'), RZ_REGEX_ECOLLATE);
-		break;
-	default: /* symbol, ordinary character, or range */
-		/* xxx revision needed for multichar stuff */
-		start = p_b_symbol(p);
-		if (SEE('-') && MORE2() && PEEK2() != ']') {
-			/* range */
-			NEXT();
-			if (EAT('-')) {
-				finish = '-';
-			} else {
-				finish = p_b_symbol(p);
-			}
-		} else {
-			finish = start;
-		}
-		/* xxx what about signed chars here... */
-		REQUIRE(start <= finish, RZ_REGEX_ERANGE);
-		for (i = start; i <= finish; i++) {
-			CHadd(cs, i);
-		}
-		break;
-	}
-}
-
-/*
- - p_b_cclass - parse a character-class name and deal with it
- */
-static void p_b_cclass(struct parse *p, cset *cs) {
-	char *sp = p->next;
-	struct cclass *cp;
-	size_t len;
-	char *u;
-	char c;
-
-	while (MORE() && isalpha((unsigned char)PEEK())) {
-		NEXT();
-	}
-	len = p->next - sp;
-	for (cp = cclasses; cp->name != NULL; cp++) {
-		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') {
-			break;
-		}
-	}
-	if (!cp->name) {
-		/* oops, didn't find it */
-		SETERROR(RZ_REGEX_ECTYPE);
-		return;
-	}
-
-	u = cp->chars;
-	while ((c = *u++) != '\0') {
-		CHadd(cs, c);
-	}
-	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) {
-		MCadd(p, cs, u);
-	}
-}
-
-/*
- - p_b_eclass - parse an equivalence-class name and deal with it
- *
- * This implementation is incomplete. xxx
- */
-static void p_b_eclass(struct parse *p, cset *cs) {
-	char c;
-
-	c = p_b_coll_elem(p, '=');
-	CHadd(cs, c);
-}
-
-/*
- - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
- */
-static char /* value of symbol */
-p_b_symbol(struct parse *p) {
-	char value;
-
-	REQUIRE(MORE(), RZ_REGEX_EBRACK);
-	if (!EATTWO('[', '.')) {
-		return (GETNEXT());
-	}
-
-	/* collating symbol */
-	value = p_b_coll_elem(p, '.');
-	REQUIRE(EATTWO('.', ']'), RZ_REGEX_ECOLLATE);
-	return (value);
-}
-
-/*
- - p_b_coll_elem - parse a collating-element name and look it up
- */
-static char /* value of collating element */
-p_b_coll_elem(struct parse *p,
-	int endc) /* name ended by endc,']' */
-{
-	char *sp = p->next;
-	struct cname *cp;
-	int len;
-
-	while (MORE() && !SEETWO(endc, ']')) {
-		NEXT();
-	}
-	if (!MORE()) {
-		SETERROR(RZ_REGEX_EBRACK);
-		return (0);
-	}
-	len = p->next - sp;
-	for (cp = cnames; cp->name != NULL; cp++) {
-		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') {
-			return (cp->code); /* known name */
-		}
-	}
-	if (len == 1) {
-		return (*sp); /* single character */
-	}
-	SETERROR(RZ_REGEX_ECOLLATE); /* neither */
-	return (0);
-}
-
-/*
- - othercase - return the case counterpart of an alphabetic
- */
-static char /* if no counterpart, return ch */
-othercase(int ch) {
-	ch = (ut8)ch;
-	if (isalpha(ch)) {
-		if (isupper(ch)) {
-			return ((ut8)tolower(ch));
-		} else if (islower(ch)) {
-			return ((ut8)toupper(ch));
-		} else { /* peculiar, but could happen */
-			return (ch);
-		}
-	}
-	return ch;
-}
-
-/*
- - bothcases - emit a dualcase version of a two-case character
- *
- * Boy, is this implementation ever a kludge...
- */
-static void bothcases(struct parse *p, int ch) {
-	char *oldnext = p->next;
-	char *oldend = p->end;
-	char bracket[3];
-
-	ch = (ut8)ch;
-	if (othercase(ch) != ch) { /* p_bracket() would recurse */
-		p->next = bracket;
-		p->end = bracket + 2;
-		bracket[0] = ch;
-		bracket[1] = ']';
-		bracket[2] = '\0';
-		p_bracket(p);
-		if (p->next == bracket + 2) {
-			p->next = oldnext;
-			p->end = oldend;
-		}
-	}
-}
-
-/*
- - ordinary - emit an ordinary character
- */
-static void
-ordinary(struct parse *p, int ch) {
-	cat_t *cap = p->g->categories;
-
-	if ((p->g->cflags & RZ_REGEX_ICASE) && isalpha((ut8)ch) && othercase(ch) != ch) {
-		bothcases(p, ch);
-	} else {
-		EMIT(OCHAR, (ut8)ch);
-		if (cap[ch] == 0) {
-			cap[ch] = p->g->ncategories++;
-		}
-	}
-}
-
-static void
-special(struct parse *p, int ch) {
-	char *oldnext = p->next;
-	char *oldend = p->end;
-	char bracket[16] = { 0 };
-	char digits[3] = { 0 };
-	char c;
-	int num = 0;
-	switch (ch) {
-	case 'x':
-		digits[0] = GETNEXT();
-		digits[1] = GETNEXT();
-		c = (char)strtol(digits, NULL, 16);
-		ordinary(p, c);
-		return;
-	case 'n':
-		ordinary(p, '\n');
-		return;
-	case 't':
-		ordinary(p, '\t');
-		return;
-	case 'r':
-		ordinary(p, '\r');
-		return;
-	case 's':
-	case 'S':
-		num = 6;
-		const char *chars = "^\t\r\n ]";
-		if (ch == 's') {
-			num--;
-			chars++;
-		}
-		memcpy(bracket, chars, num);
-		break;
-	case 'd':
-		num = 4;
-		memcpy(bracket, "0-9]", num);
-		break;
-	case 'w':
-		num = 4;
-		memcpy(bracket, "a-z]", num);
-		break;
-	default:
-		SETERROR(RZ_REGEX_INVARG);
-		return;
-	}
-
-	p->next = bracket;
-	p->end = bracket + num;
-
-	p_bracket(p);
-
-	if (p->next == bracket + num) {
-		p->next = oldnext;
-		p->end = oldend;
-	}
-}
-
-/*
- - nonnewline - emit RZ_REGEX_NEWLINE version of OANY
- *
- * Boy, is this implementation ever a kludge...
- */
-static void
-nonnewline(struct parse *p) {
-	char *oldnext = p->next;
-	char *oldend = p->end;
-	char bracket[4];
-
-	p->next = bracket;
-	p->end = bracket + 3;
-	bracket[0] = '^';
-	bracket[1] = '\n';
-	bracket[2] = ']';
-	bracket[3] = '\0';
-	p_bracket(p);
-	if (p->next == bracket + 3) {
-		p->next = oldnext;
-		p->end = oldend;
-	}
-}
-
-/*
- - repeat - generate code for a bounded repetition, recursively if needed
- */
-static void
-repeat(struct parse *p,
-	sopno start, /* operand from here to end of strip */
-	int from, /* repeated from this number */
-	int to) /* to this number of times (maybe INTFINITY) */
-{
-	sopno finish = HERE();
-#define N         2
-#define INF       3
-#define REP(f, t) ((f)*8 + (t))
-#define MAP(n)    (((n) <= 1) ? (n) : ((n) == INTFINITY) ? INF \
-							 : N)
-	sopno copy;
-
-	if (p->error != 0) { /* head off possible runaway recursion */
-		return;
-	}
-
-	if (from > to) {
-		return;
-	}
-
-	switch (REP(MAP(from), MAP(to))) {
-	case REP(0, 0): /* must be user doing this */
-		DROP(finish - start); /* drop the operand */
-		break;
-	case REP(0, 1): /* as x{1,1}? */
-	case REP(0, N): /* as x{1,n}? */
-	case REP(0, INF): /* as x{1,}? */
-		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
-		INSERT(OCH_, start); /* offset is wrong... */
-		repeat(p, start + 1, 1, to);
-		ASTERN(OOR1, start);
-		AHEAD(start); /* ... fix it */
-		EMIT(OOR2, 0);
-		AHEAD(THERE());
-		ASTERN(O_CH, THERETHERE());
-		break;
-	case REP(1, 1): /* trivial case */
-		/* done */
-		break;
-	case REP(1, N): /* as x?x{1,n-1} */
-		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
-		INSERT(OCH_, start);
-		ASTERN(OOR1, start);
-		AHEAD(start);
-		EMIT(OOR2, 0); /* offset very wrong... */
-		AHEAD(THERE()); /* ...so fix it */
-		ASTERN(O_CH, THERETHERE());
-		copy = dupl(p, start + 1, finish + 1);
-		if (copy == finish + 4) {
-			repeat(p, copy, 1, to - 1);
-		}
-		break;
-	case REP(1, INF): /* as x+ */
-		INSERT(OPLUS_, start);
-		ASTERN(O_PLUS, start);
-		break;
-	case REP(N, N): /* as xx{m-1,n-1} */
-		copy = dupl(p, start, finish);
-		repeat(p, copy, from - 1, to - 1);
-		break;
-	case REP(N, INF): /* as xx{n-1,INF} */
-		copy = dupl(p, start, finish);
-		repeat(p, copy, from - 1, to);
-		break;
-	default: /* "can't happen" */
-		SETERROR(RZ_REGEX_ASSERT); /* just in case */
-		break;
-	}
-}
-
-/*
- - seterr - set an error condition
- */
-static int /* useless but makes type checking happy */
-seterr(struct parse *p, int e) {
-	if (p->error == 0) { /* keep earliest error condition */
-		p->error = e;
-	}
-	p->next = nuls; /* try to bring things to a halt */
-	p->end = nuls;
-	return (0); /* make the return value well-defined */
-}
-
-/*
- - allocset - allocate a set of characters for []
- */
-static cset *allocset(struct parse *p) {
-	int no = p->g->ncsets++;
-	size_t nc;
-	size_t nbytes;
-	cset *cs;
-	size_t css = (size_t)p->g->csetsize;
-	int i;
-
-	if (no >= p->ncsalloc) { /* need another column of space */
-		void *ptr;
-
-		p->ncsalloc += CHAR_BIT;
-		nc = p->ncsalloc;
-		if (nc % CHAR_BIT) {
-			goto nomem;
-		}
-		nbytes = nc / CHAR_BIT * css;
-
-		ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset));
-		if (!ptr) {
-			goto nomem;
-		}
-		p->g->sets = ptr;
-
-		ptr = (ut8 *)realloc((char *)p->g->setbits, nbytes);
-		if (!ptr) {
-			goto nomem;
-		}
-		p->g->setbits = ptr;
-
-		for (i = 0; i < no; i++) {
-			p->g->sets[i].ptr = p->g->setbits + css * (i / CHAR_BIT);
-		}
-
-		(void)memset((char *)p->g->setbits + (nbytes - css), 0, css);
-	}
-	/* XXX should not happen */
-	if (!p->g->sets || !p->g->setbits) {
-		goto nomem;
-	}
-
-	cs = &p->g->sets[no];
-	cs->ptr = p->g->setbits + css * ((no) / CHAR_BIT);
-	cs->mask = 1 << ((no) % CHAR_BIT);
-	cs->hash = 0;
-	cs->smultis = 0;
-	cs->multis = NULL;
-
-	return (cs);
-nomem:
-	RZ_FREE(p->g->sets);
-	RZ_FREE(p->g->setbits);
-
-	SETERROR(RZ_REGEX_ESPACE);
-	/* caller's responsibility not to do set ops */
-	return (NULL);
-}
-
-/*
- - freeset - free a now-unused set
- */
-static void freeset(struct parse *p, cset *cs) {
-	int i;
-	cset *top = &p->g->sets[p->g->ncsets];
-	size_t css = (size_t)p->g->csetsize;
-
-	for (i = 0; i < css; i++) {
-		CHsub(cs, i);
-	}
-	if (cs == top - 1) { /* recover only the easy case */
-		p->g->ncsets--;
-	}
-}
-
-/*
- - freezeset - final processing on a set of characters
- *
- * The main task here is merging identical sets.  This is usually a waste
- * of time (although the hash code minimizes the overhead), but can win
- * big if RZ_REGEX_ICASE is being used.  RZ_REGEX_ICASE, by the way, is why the hash
- * is done using addition rather than xor -- all ASCII [aA] sets xor to
- * the same value!
- */
-static int /* set number */
-freezeset(struct parse *p, cset *cs) {
-	ut8 h = cs->hash;
-	int i;
-	cset *top = &p->g->sets[p->g->ncsets];
-	cset *cs2;
-	size_t css = (size_t)p->g->csetsize;
-
-	/* look for an earlier one which is the same */
-	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) {
-		if (cs2->hash == h && cs2 != cs) {
-			/* maybe */
-			for (i = 0; i < css; i++) {
-				if (!!CHIN(cs2, i) != !!CHIN(cs, i)) {
-					break; /* no */
-				}
-			}
-			if (i == css) {
-				break; /* yes */
-			}
-		}
-	}
-
-	if (cs2 < top) { /* found one */
-		freeset(p, cs);
-		cs = cs2;
-	}
-
-	return ((int)(cs - p->g->sets));
-}
-
-/*
- - firstch - return first character in a set (which must have at least one)
- */
-static int /* character; there is no "none" value */
-firstch(struct parse *p, cset *cs) {
-	int i;
-	size_t css = (size_t)p->g->csetsize;
-
-	for (i = 0; i < css; i++) {
-		if (CHIN(cs, i)) {
-			return ((char)i);
-		}
-	}
-	return (0); /* arbitrary */
-}
-
-/*
- - nch - number of characters in a set
- */
-static int nch(struct parse *p, cset *cs) {
-	int i;
-	size_t css = (size_t)p->g->csetsize;
-	int n = 0;
-
-	for (i = 0; i < css; i++) {
-		if (CHIN(cs, i)) {
-			n++;
-		}
-	}
-	return (n);
-}
-
-/*
- - mcadd - add a collating element to a cset
- */
-static void mcadd(struct parse *p, cset *cs, char *cp) {
-	size_t oldend = cs->smultis;
-	void *np;
-
-	cs->smultis += strlen(cp) + 1;
-	np = realloc(cs->multis, cs->smultis);
-	if (!np) {
-		if (cs->multis) {
-			free(cs->multis);
-		}
-		cs->multis = NULL;
-		SETERROR(RZ_REGEX_ESPACE);
-		return;
-	}
-	cs->multis = np;
-
-	STRLCPY(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
-}
-
-/*
- - mcinvert - invert the list of collating elements in a cset
- *
- * This would have to know the set of possibilities.  Implementation
- * is deferred.
- */
-/* ARGSUSED */
-static void mcinvert(struct parse *p, cset *cs) {
-	// asert(!cs->multis);	/* xxx */
-	return;
-}
-
-/*
- - mccase - add case counterparts of the list of collating elements in a cset
- *
- * This would have to know the set of possibilities.  Implementation
- * is deferred.
- */
-/* ARGSUSED */
-static void mccase(struct parse *p, cset *cs) {
-	// asert(!cs->multis);	/* xxx */
-	return;
-}
-
-/*
- - isinsets - is this character in any sets?
- */
-static int /* predicate */
-isinsets(struct re_guts *g, int c) {
-	ut8 *col;
-	int i;
-	int ncols = (g->ncsets + (CHAR_BIT - 1)) / CHAR_BIT;
-	unsigned uc = (ut8)c;
-
-	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) {
-		if (col[uc] != 0) {
-			return (1);
-		}
-	}
-	return (0);
-}
-
-/*
- - samesets - are these two characters in exactly the same sets?
- */
-static int /* predicate */
-samesets(struct re_guts *g, int c1, int c2) {
-	ut8 *col;
-	int i;
-	int ncols = (g->ncsets + (CHAR_BIT - 1)) / CHAR_BIT;
-	unsigned uc1 = (ut8)c1;
-	unsigned uc2 = (ut8)c2;
-
-	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) {
-		if (col[uc1] != col[uc2]) {
-			return (0);
-		}
-	}
-	return (1);
-}
-
-/*
- - categorize - sort out character categories
- */
-static void
-categorize(struct parse *p, struct re_guts *g) {
-	cat_t *cats = g ? g->categories : NULL;
-	int c;
-	int c2;
-	cat_t cat;
-
-	/* avoid making error situations worse */
-	if (!p || p->error != 0 || !cats) {
-		return;
-	}
-
-	for (c = CHAR_MIN; c <= CHAR_MAX; c++) {
-		if (*(cats + c) && isinsets(g, c)) {
-			cat = g->ncategories++;
-			cats[c] = cat;
-			for (c2 = c + 1; c2 <= CHAR_MAX; c2++) {
-				if (cats[c2] == 0 && samesets(g, c, c2)) {
-					cats[c2] = cat;
-				}
-			}
-		}
-	}
-}
-
-/*
- - dupl - emit a duplicate of a bunch of sops
- */
-static sopno /* start of duplicate */
-dupl(struct parse *p,
-	sopno start, /* from here */
-	sopno finish) /* to this less one */
-{
-	sopno ret = HERE();
-	sopno len = finish - start;
-
-	if (finish >= start) {
-		if (len == 0) {
-			return (ret);
-		}
-		enlarge(p, p->ssize + len); /* this many unexpected additions */
-		if (p->ssize >= p->slen + len) {
-			(void)memcpy((char *)(p->strip + p->slen),
-				(char *)(p->strip + start), (size_t)len * sizeof(sop));
-			p->slen += len;
-			return (ret);
-		}
-	}
-	return ret;
-}
-
-/*
- - doemit - emit a strip operator
- *
- * It might seem better to implement this as a macro with a function as
- * hard-case backup, but it's just too big and messy unless there are
- * some changes to the data structures.  Maybe later.
- */
-static void
-doemit(struct parse *p, sop op, size_t opnd) {
-	/* avoid making error situations worse */
-	if (p->error != 0) {
-		return;
-	}
-
-	/* deal with oversize operands ("can't happen", more or less) */
-	if (opnd < 1 << OPSHIFT) {
-
-		/* deal with undersized strip */
-		if (p->slen >= p->ssize) {
-			enlarge(p, (p->ssize + 1) / 2 * 3); /* +50% */
-		}
-		if (p->slen < p->ssize) {
-			/* finally, it's all reduced to the easy case */
-			p->strip[p->slen++] = SOP(op, opnd);
-		}
-	}
-}
-
-/*
- - doinsert - insert a sop into the strip
- */
-static void
-doinsert(struct parse *p, sop op, size_t opnd, sopno pos) {
-	sopno sn;
-	sop s;
-	int i;
-
-	/* avoid making error situations worse */
-	if (p->error != 0) {
-		return;
-	}
-
-	sn = HERE();
-	EMIT(op, opnd); /* do checks, ensure space */
-	if (HERE() != sn + 1) {
-		return;
-	}
-	s = p->strip[sn];
-
-	/* adjust paren pointers */
-	if (pos > 0) {
-		for (i = 1; i < NPAREN; i++) {
-			if (p->pbegin[i] >= pos) {
-				p->pbegin[i]++;
-			}
-			if (p->pend[i] >= pos) {
-				p->pend[i]++;
-			}
-		}
-	}
-
-	memmove((char *)&p->strip[pos + 1], (char *)&p->strip[pos],
-		(HERE() - pos - 1) * sizeof(sop));
-	p->strip[pos] = s;
-}
-
-/*
- - dofwd - complete a forward reference
- */
-static void
-dofwd(struct parse *p, sopno pos, sop value) {
-	/* avoid making error situations worse */
-	if (p->error != 0) {
-		return;
-	}
-
-	if (value < 1 << OPSHIFT) {
-		p->strip[pos] = OP(p->strip[pos]) | value;
-	}
-}
-
-/*
- - enlarge - enlarge the strip
- */
-static void
-enlarge(struct parse *p, sopno size) {
-	sop *sp;
-
-	if (p->ssize >= size) {
-		return;
-	}
-
-	sp = (sop *)realloc(p->strip, size * sizeof(sop));
-	if (!sp) {
-		SETERROR(RZ_REGEX_ESPACE);
-		return;
-	}
-	p->strip = sp;
-	p->ssize = size;
-}
-
-/*
- - stripsnug - compact the strip
- */
-static void
-stripsnug(struct parse *p, struct re_guts *g) {
-	g->nstates = p->slen;
-	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
-	if (!g->strip) {
-		SETERROR(RZ_REGEX_ESPACE);
-		g->strip = p->strip;
-	}
-}
-
-/*
- - findmust - fill in must and mlen with longest mandatory literal string
- *
- * This algorithm could do fancy things like analyzing the operands of |
- * for common subsequences.  Someday.  This code is simple and finds most
- * of the interesting cases.
- *
- * Note that must and mlen got initialized during setup.
- */
-static void
-findmust(struct parse *p, struct re_guts *g) {
-	sop *scan;
-	sop *start = NULL; /* start initialized in the default case, after that */
-	sop *newstart = NULL; /* newstart was initialized in the OCHAR case */
-	sopno newlen;
-	sop s;
-	char *cp;
-	sopno i;
-
-	/* avoid making error situations worse */
-	if (p->error != 0) {
-		return;
-	}
-
-	/* find the longest OCHAR sequence in strip */
-	newlen = 0;
-	start = scan = g->strip + 1;
-	do {
-		s = *scan++;
-		switch (OP(s)) {
-		case OCHAR: /* sequence member */
-			if (newlen == 0) { /* new sequence */
-				newstart = scan - 1;
-			}
-			newlen++;
-			break;
-		case OPLUS_: /* things that don't break one */
-		case OLPAREN:
-		case ORPAREN:
-			break;
-		case OQUEST_: /* things that must be skipped */
-		case OCH_:
-			scan--;
-			do {
-				scan += OPND(s);
-				s = *scan;
-				/* asert() interferes w debug printouts */
-				if (OP(s) != O_QUEST && OP(s) != O_CH &&
-					OP(s) != OOR2) {
-					g->iflags |= BAD;
-					return;
-				}
-			} while (OP(s) != O_QUEST && OP(s) != O_CH);
-			/* fallthrough */
-		default: /* things that break a sequence */
-			if (newlen > g->mlen) { /* ends one */
-				start = newstart;
-				g->mlen = newlen;
-			}
-			newlen = 0;
-			break;
-		}
-	} while (OP(s) != OEND);
-
-	if (g->mlen == 0) { /* there isn't one */
-		return;
-	}
-
-	/* turn it into a character string */
-	g->must = malloc((size_t)g->mlen + 1);
-	if (!g->must) { /* argh; just forget it */
-		g->mlen = 0;
-		return;
-	}
-	cp = g->must;
-	scan = start;
-	for (i = g->mlen; i > 0; i--) {
-		while (OP(s = *scan++) != OCHAR) {
-			continue;
-		}
-		if (cp < g->must + g->mlen) {
-			*cp++ = (char)OPND(s);
-		}
-	}
-	if (cp == g->must + g->mlen) {
-		*cp++ = '\0'; /* just on general principles */
-	}
-}
-
-/*
- - pluscount - count + nesting
- */
-static sopno /* nesting depth */
-pluscount(struct parse *p, struct re_guts *g) {
-	sop *scan;
-	sop s;
-	sopno plusnest = 0;
-	sopno maxnest = 0;
-
-	if (p->error != 0) {
-		return (0); /* there may not be an OEND */
-	}
-
-	scan = g->strip + 1;
-	do {
-		s = *scan++;
-		switch (OP(s)) {
-		case OPLUS_:
-			plusnest++;
-			break;
-		case O_PLUS:
-			if (plusnest > maxnest) {
-				maxnest = plusnest;
-			}
-			plusnest--;
-			break;
-		}
-	} while (OP(s) != OEND);
-	if (plusnest != 0) {
-		g->iflags |= BAD;
-	}
-	return (maxnest);
-}
diff --git a/librz/util/regex/regerror.c b/librz/util/regex/regerror.c
deleted file mode 100644
index 069115b7e6b..00000000000
--- a/librz/util/regex/regerror.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*	$OpenBSD: regerror.c,v 1.13 2005/08/05 13:03:00 espie Exp $ */
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)regerror.c	8.4 (Berkeley) 3/20/94
- */
-
-#include <sys/types.h>
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include <limits.h>
-#include <stdlib.h>
-#include "rz_regex.h"
-
-#include "utils.h"
-
-static char *regatoi(const RzRegex *, char *, int);
-
-static struct rerr {
-	int code;
-	char *name;
-	char *explain;
-} rerrs[] = {
-	{ RZ_REGEX_NOMATCH, "RZ_REGEX_NOMATCH", "regexec() failed to match" },
-	{ RZ_REGEX_BADPAT, "RZ_REGEX_BADPAT", "invalid regular expression" },
-	{ RZ_REGEX_ECOLLATE, "RZ_REGEX_ECOLLATE", "invalid collating element" },
-	{ RZ_REGEX_ECTYPE, "RZ_REGEX_ECTYPE", "invalid character class" },
-	{ RZ_REGEX_EESCAPE, "RZ_REGEX_EESCAPE", "trailing backslash (\\)" },
-	{ RZ_REGEX_ESUBREG, "RZ_REGEX_ESUBREG", "invalid backreference number" },
-	{ RZ_REGEX_EBRACK, "RZ_REGEX_EBRACK", "brackets ([ ]) not balanced" },
-	{ RZ_REGEX_EPAREN, "RZ_REGEX_EPAREN", "parentheses not balanced" },
-	{ RZ_REGEX_EBRACE, "RZ_REGEX_EBRACE", "braces not balanced" },
-	{ RZ_REGEX_BADBR, "RZ_REGEX_BADBR", "invalid repetition count(s)" },
-	{ RZ_REGEX_ERANGE, "RZ_REGEX_ERANGE", "invalid character range" },
-	{ RZ_REGEX_ESPACE, "RZ_REGEX_ESPACE", "out of memory" },
-	{ RZ_REGEX_BADRPT, "RZ_REGEX_BADRPT", "repetition-operator operand invalid" },
-	{ RZ_REGEX_EMPTY, "RZ_REGEX_EMPTY", "empty (sub)expression" },
-	{ RZ_REGEX_ASSERT, "RZ_REGEX_ASSERT", "\"can't happen\" -- you found a bug" },
-	{ RZ_REGEX_INVARG, "RZ_REGEX_INVARG", "invalid argument to regex routine" },
-	{ 0, "", "*** unknown regexp error code ***" }
-};
-
-/*
- - regerror - the interface to error numbers
- = extern size_t regerror(int, const regex_t *, char *, size_t);
- */
-/* ARGSUSED */
-RZ_API size_t rz_regex_error(int errcode, const RzRegex *preg, char *errbuf, size_t errbuf_size) {
-	struct rerr *r;
-	size_t len;
-	int target = errcode & ~RZ_REGEX_ITOA;
-	char *s;
-	char convbuf[50];
-
-	if (errcode == RZ_REGEX_ATOI) {
-		s = regatoi(preg, convbuf, sizeof convbuf);
-	} else {
-		for (r = rerrs; r->code != 0; r++) {
-			if (r->code == target) {
-				break;
-			}
-		}
-
-		if (errcode & RZ_REGEX_ITOA) {
-			if (r->code != 0) {
-				STRLCPY(convbuf, r->name, sizeof(convbuf) - 1);
-			} else {
-				snprintf(convbuf, sizeof convbuf, "RZ_REGEX_0x%x", target);
-			}
-			s = convbuf;
-		} else {
-			s = r->explain;
-		}
-	}
-
-	len = strlen(s) + 1;
-	if (errbuf_size > 0) {
-		STRLCPY(errbuf, s, errbuf_size - 1);
-	}
-
-	return len;
-}
-
-/*
- - regatoi - internal routine to implement RZ_REGEX_ATOI
- */
-static char *
-regatoi(const RzRegex *preg, char *localbuf, int localbufsize) {
-	struct rerr *r;
-
-	for (r = rerrs; r->code != 0; r++) {
-		if (strcmp(r->name, preg->re_endp) == 0) {
-			break;
-		}
-	}
-	if (r->code == 0) {
-		return ("0");
-	}
-
-	(void)snprintf(localbuf, localbufsize, "%d", r->code);
-	return (localbuf);
-}
diff --git a/librz/util/regex/regex.3 b/librz/util/regex/regex.3
deleted file mode 100644
index c851b8d8e9c..00000000000
--- a/librz/util/regex/regex.3
+++ /dev/null
@@ -1,667 +0,0 @@
-.\"	$OpenBSD: regex.3,v 1.21 2007/05/31 19:19:30 jmc Exp $
-.\"
-.\" Copyright (c) 1997, Phillip F Knaack. All rights reserved.
-.\"
-.\" Copyright (c) 1992, 1993, 1994 Henry Spencer.
-.\" Copyright (c) 1992, 1993, 1994
-.\"	The Regents of the University of California.  All rights reserved.
-.\"
-.\" This code is derived from software contributed to Berkeley by
-.\" Henry Spencer.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice, this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\" 3. Neither the name of the University nor the names of its contributors
-.\"    may be used to endorse or promote products derived from this software
-.\"    without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\"	@(#)regex.3	8.4 (Berkeley) 3/20/94
-.\"
-.Dd $Mdocdate: May 31 2007 $
-.Dt REGEX 3
-.Os
-.Sh NAME
-.Nm regcomp ,
-.Nm regexec ,
-.Nm regerror ,
-.Nm regfree
-.Nd regular expression routines
-.Sh SYNOPSIS
-.Fd #include <sys/types.h>
-.Fd #include <regex.h>
-.Ft int
-.Fn regcomp "regex_t *preg" "const char *pattern" "int cflags"
-.Pp
-.Ft int
-.Fn regexec "const regex_t *preg" "const char *string" "size_t nmatch" \
-            "regmatch_t pmatch[]" "int eflags"
-.Pp
-.Ft size_t
-.Fn regerror "int errcode" "const regex_t *preg" "char *errbuf" \
-             "size_t errbuf_size"
-.Pp
-.Ft void
-.Fn regfree "regex_t *preg"
-.Sh DESCRIPTION
-These routines implement
-.St -p1003.2
-regular expressions
-.Pq Dq REs ;
-see
-.Xr re_format 7 .
-.Fn regcomp
-compiles an RE written as a string into an internal form,
-.Fn regexec
-matches that internal form against a string and reports results,
-.Fn regerror
-transforms error codes from either into human-readable messages, and
-.Fn regfree
-frees any dynamically allocated storage used by the internal form
-of an RE.
-.Pp
-The header
-.Aq Pa regex.h
-declares two structure types,
-.Li regex_t
-and
-.Li regmatch_t ,
-the former for compiled internal forms and the latter for match reporting.
-It also declares the four functions,
-a type
-.Li regoff_t ,
-and a number of constants with names starting with
-.Dv REG_ .
-.Pp
-.Fn regcomp
-compiles the regular expression contained in the
-.Fa pattern
-string,
-subject to the flags in
-.Fa cflags ,
-and places the results in the
-.Li regex_t
-structure pointed to by
-.Fa preg .
-.Fa cflags
-is the bitwise
-.Tn OR
-of zero or more of the following flags:
-.Bl -tag -width XREG_EXTENDEDX
-.It Dv REG_EXTENDED
-Compile modern
-.Pq Dq extended
-REs,
-rather than the obsolete
-.Pq Dq basic
-REs that are the default.
-.It Dv REG_BASIC
-This is a synonym for 0,
-provided as a counterpart to
-.Dv REG_EXTENDED
-to improve readability.
-.It Dv REG_NOSPEC
-Compile with recognition of all special characters turned off.
-All characters are thus considered ordinary,
-so the RE is a literal string.
-This is an extension,
-compatible with but not specified by
-.St -p1003.2 ,
-and should be used with
-caution in software intended to be portable to other systems.
-.Dv REG_EXTENDED
-and
-.Dv REG_NOSPEC
-may not be used in the same call to
-.Fn regcomp .
-.It Dv REG_ICASE
-Compile for matching that ignores upper/lower case distinctions.
-See
-.Xr re_format 7 .
-.It Dv REG_NOSUB
-Compile for matching that need only report success or failure,
-not what was matched.
-.It Dv REG_NEWLINE
-Compile for newline-sensitive matching.
-By default, newline is a completely ordinary character with no special
-meaning in either REs or strings.
-With this flag,
-.Ql \&[^
-bracket expressions and
-.Ql \&.
-never match newline,
-a
-.Ql ^
-anchor matches the null string after any newline in the string
-in addition to its normal function,
-and the
-.Ql $
-anchor matches the null string before any newline in the
-string in addition to its normal function.
-.It Dv REG_PEND
-The regular expression ends,
-not at the first NUL,
-but just before the character pointed to by the
-.Fa re_endp
-member of the structure pointed to by
-.Fa preg .
-The
-.Fa re_endp
-member is of type
-.Fa const\ char\ * .
-This flag permits inclusion of NULs in the RE;
-they are considered ordinary characters.
-This is an extension,
-compatible with but not specified by
-.St -p1003.2 ,
-and should be used with
-caution in software intended to be portable to other systems.
-.El
-.Pp
-When successful,
-.Fn regcomp
-returns 0 and fills in the structure pointed to by
-.Fa preg .
-One member of that structure
-(other than
-.Fa re_endp )
-is publicized:
-.Fa re_nsub ,
-of type
-.Fa size_t ,
-contains the number of parenthesized subexpressions within the RE
-(except that the value of this member is undefined if the
-.Dv REG_NOSUB
-flag was used).
-If
-.Fn regcomp
-fails, it returns a non-zero error code;
-see DIAGNOSTICS.
-.Pp
-.Fn regexec
-matches the compiled RE pointed to by
-.Fa preg
-against the
-.Fa string ,
-subject to the flags in
-.Fa eflags ,
-and reports results using
-.Fa nmatch ,
-.Fa pmatch ,
-and the returned value.
-The RE must have been compiled by a previous invocation of
-.Fn regcomp .
-The compiled form is not altered during execution of
-.Fn regexec ,
-so a single compiled RE can be used simultaneously by multiple threads.
-.Pp
-By default,
-the NUL-terminated string pointed to by
-.Fa string
-is considered to be the text of an entire line, minus any terminating
-newline.
-The
-.Fa eflags
-argument is the bitwise
-.Tn OR
-of zero or more of the following flags:
-.Bl -tag -width XREG_STARTENDX
-.It Dv REG_NOTBOL
-The first character of
-the string
-is not the beginning of a line, so the
-.Ql ^
-anchor should not match before it.
-This does not affect the behavior of newlines under
-.Dv REG_NEWLINE .
-.It Dv REG_NOTEOL
-The NUL terminating
-the string
-does not end a line, so the
-.Ql $
-anchor should not match before it.
-This does not affect the behavior of newlines under
-.Dv REG_NEWLINE .
-.It Dv REG_STARTEND
-The string is considered to start at
-\fIstring\fR\ + \fIpmatch\fR[0].\fIrm_so\fR
-and to have a terminating NUL located at
-\fIstring\fR\ + \fIpmatch\fR[0].\fIrm_eo\fR
-(there need not actually be a NUL at that location),
-regardless of the value of
-.Fa nmatch .
-See below for the definition of
-.Fa pmatch
-and
-.Fa nmatch .
-This is an extension,
-compatible with but not specified by
-.St -p1003.2 ,
-and should be used with
-caution in software intended to be portable to other systems.
-Note that a non-zero \fIrm_so\fR does not imply
-.Dv REG_NOTBOL ;
-.Dv REG_STARTEND
-affects only the location of the string,
-not how it is matched.
-.El
-.Pp
-See
-.Xr re_format 7
-for a discussion of what is matched in situations where an RE or a
-portion thereof could match any of several substrings of
-.Fa string .
-.Pp
-Normally,
-.Fn regexec
-returns 0 for success and the non-zero code
-.Dv REG_NOMATCH
-for failure.
-Other non-zero error codes may be returned in exceptional situations;
-see DIAGNOSTICS.
-.Pp
-If
-.Dv REG_NOSUB
-was specified in the compilation of the RE,
-or if
-.Fa nmatch
-is 0,
-.Fn regexec
-ignores the
-.Fa pmatch
-argument (but see below for the case where
-.Dv REG_STARTEND
-is specified).
-Otherwise,
-.Fa pmatch
-points to an array of
-.Fa nmatch
-structures of type
-.Li regmatch_t .
-Such a structure has at least the members
-.Fa rm_so
-and
-.Fa rm_eo ,
-both of type
-.Fa regoff_t
-(a signed arithmetic type at least as large as an
-.Li off_t
-and a
-.Li ssize_t ) ,
-containing respectively the offset of the first character of a substring
-and the offset of the first character after the end of the substring.
-Offsets are measured from the beginning of the
-.Fa string
-argument given to
-.Fn regexec .
-An empty substring is denoted by equal offsets,
-both indicating the character following the empty substring.
-.Pp
-The 0th member of the
-.Fa pmatch
-array is filled in to indicate what substring of
-.Fa string
-was matched by the entire RE.
-Remaining members report what substring was matched by parenthesized
-subexpressions within the RE;
-member
-.Va i
-reports subexpression
-.Va i ,
-with subexpressions counted (starting at 1) by the order of their opening
-parentheses in the RE, left to right.
-Unused entries in the array\(emcorresponding either to subexpressions that
-did not participate in the match at all, or to subexpressions that do not
-exist in the RE (that is, \fIi\fR\ > \fIpreg\fR\->\fIre_nsub\fR)\(emhave both
-.Fa rm_so
-and
-.Fa rm_eo
-set to \-1.
-If a subexpression participated in the match several times,
-the reported substring is the last one it matched.
-(Note, as an example in particular, that when the RE
-.Dq (b*)+
-matches
-.Dq bbb ,
-the parenthesized subexpression matches each of the three
-.Sq b Ns s
-and then
-an infinite number of empty strings following the last
-.Sq b ,
-so the reported substring is one of the empties.)
-.Pp
-If
-.Dv REG_STARTEND
-is specified,
-.Fa pmatch
-must point to at least one
-.Li regmatch_t
-(even if
-.Fa nmatch
-is 0 or
-.Dv REG_NOSUB
-was specified),
-to hold the input offsets for
-.Dv REG_STARTEND .
-Use for output is still entirely controlled by
-.Fa nmatch ;
-if
-.Fa nmatch
-is 0 or
-.Dv REG_NOSUB
-was specified,
-the value of
-.Fa pmatch[0]
-will not be changed by a successful
-.Fn regexec .
-.Pp
-.Fn regerror
-maps a non-zero
-.Va errcode
-from either
-.Fn regcomp
-or
-.Fn regexec
-to a human-readable, printable message.
-If
-.Fa preg
-is non-NULL,
-the error code should have arisen from use of
-the
-.Li regex_t
-pointed to by
-.Fa preg ,
-and if the error code came from
-.Fn regcomp ,
-it should have been the result from the most recent
-.Fn regcomp
-using that
-.Li regex_t .
-.Pf ( Fn regerror
-may be able to supply a more detailed message using information
-from the
-.Li regex_t . )
-.Fn regerror
-places the NUL-terminated message into the buffer pointed to by
-.Fa errbuf ,
-limiting the length (including the NUL) to at most
-.Fa errbuf_size
-bytes.
-If the whole message won't fit,
-as much of it as will fit before the terminating NUL is supplied.
-In any case,
-the returned value is the size of buffer needed to hold the whole
-message (including the terminating NUL).
-If
-.Fa errbuf_size
-is 0,
-.Fa errbuf
-is ignored but the return value is still correct.
-.Pp
-If the
-.Fa errcode
-given to
-.Fn regerror
-is first
-.Tn OR Ns 'ed
-with
-.Dv REG_ITOA ,
-the
-.Dq message
-that results is the printable name of the error code,
-e.g.,
-.Dq REG_NOMATCH ,
-rather than an explanation thereof.
-If
-.Fa errcode
-is
-.Dv REG_ATOI ,
-then
-.Fa preg
-shall be non-null and the
-.Fa re_endp
-member of the structure it points to
-must point to the printable name of an error code;
-in this case, the result in
-.Fa errbuf
-is the decimal digits of
-the numeric value of the error code
-(0 if the name is not recognized).
-.Dv REG_ITOA
-and
-.Dv REG_ATOI
-are intended primarily as debugging facilities;
-they are extensions,
-compatible with but not specified by
-.St -p1003.2
-and should be used with
-caution in software intended to be portable to other systems.
-Be warned also that they are considered experimental and changes are possible.
-.Pp
-.Fn regfree
-frees any dynamically allocated storage associated with the compiled RE
-pointed to by
-.Fa preg .
-The remaining
-.Li regex_t
-is no longer a valid compiled RE
-and the effect of supplying it to
-.Fn regexec
-or
-.Fn regerror
-is undefined.
-.Pp
-None of these functions references global variables except for tables
-of constants;
-all are safe for use from multiple threads if the arguments are safe.
-.Sh IMPLEMENTATION CHOICES
-There are a number of decisions that
-.St -p1003.2
-leaves up to the implementor,
-either by explicitly saying
-.Dq undefined
-or by virtue of them being
-forbidden by the RE grammar.
-This implementation treats them as follows.
-.Pp
-See
-.Xr re_format 7
-for a discussion of the definition of case-independent matching.
-.Pp
-There is no particular limit on the length of REs,
-except insofar as memory is limited.
-Memory usage is approximately linear in RE size, and largely insensitive
-to RE complexity, except for bounded repetitions.
-See
-.Sx BUGS
-for one short RE using them
-that will run almost any system out of memory.
-.Pp
-A backslashed character other than one specifically given a magic meaning
-by
-.St -p1003.2
-(such magic meanings occur only in obsolete REs)
-is taken as an ordinary character.
-.Pp
-Any unmatched
-.Ql \&[
-is a
-.Dv REG_EBRACK
-error.
-.Pp
-Equivalence classes cannot begin or end bracket-expression ranges.
-The endpoint of one range cannot begin another.
-.Pp
-RE_DUP_MAX, the limit on repetition counts in bounded repetitions, is 255.
-.Pp
-A repetition operator (?, *, +, or bounds) cannot follow another
-repetition operator.
-A repetition operator cannot begin an expression or subexpression
-or follow
-.Ql ^
-or
-.Ql | .
-.Pp
-A
-.Ql |
-cannot appear first or last in a (sub)expression, or after another
-.Ql | ,
-i.e., an operand of
-.Ql |
-cannot be an empty subexpression.
-An empty parenthesized subexpression,
-.Ql \&(\&) ,
-is legal and matches an
-empty (sub)string.
-An empty string is not a legal RE.
-.Pp
-A
-.Ql {
-followed by a digit is considered the beginning of bounds for a
-bounded repetition, which must then follow the syntax for bounds.
-A
-.Ql {
-.Em not
-followed by a digit is considered an ordinary character.
-.Pp
-.Ql ^
-and
-.Ql $
-beginning and ending subexpressions in obsolete
-.Pq Dq basic
-REs are anchors, not ordinary characters.
-.Sh DIAGNOSTICS
-Non-zero error codes from
-.Fn regcomp
-and
-.Fn regexec
-include the following:
-.Pp
-.Bl -tag -compact -width XREG_ECOLLATEX
-.It Er REG_NOMATCH
-regexec() failed to match
-.It Er REG_BADPAT
-invalid regular expression
-.It Er REG_ECOLLATE
-invalid collating element
-.It Er REG_ECTYPE
-invalid character class
-.It Er REG_EESCAPE
-\e applied to unescapable character
-.It Er REG_ESUBREG
-invalid backreference number
-.It Er REG_EBRACK
-brackets [ ] not balanced
-.It Er REG_EPAREN
-parentheses ( ) not balanced
-.It Er REG_EBRACE
-braces { } not balanced
-.It Er REG_BADBR
-invalid repetition count(s) in { }
-.It Er REG_ERANGE
-invalid character range in [ ]
-.It Er REG_ESPACE
-ran out of memory
-.It Er REG_BADRPT
-?, *, or + operand invalid
-.It Er REG_EMPTY
-empty (sub)expression
-.It Er REG_ASSERT
-.Dq can't happen
-\(emyou found a bug
-.It Er REG_INVARG
-invalid argument, e.g., negative-length string
-.El
-.Sh SEE ALSO
-.Xr grep 1 ,
-.Xr re_format 7
-.Pp
-.St -p1003.2 ,
-sections 2.8 (Regular Expression Notation)
-and
-B.5 (C Binding for Regular Expression Matching).
-.Sh HISTORY
-Originally written by Henry Spencer.
-Altered for inclusion in the
-.Bx 4.4
-distribution.
-.Sh BUGS
-This is an alpha release with known defects.
-Please report problems.
-.Pp
-There is one known functionality bug.
-The implementation of internationalization is incomplete:
-the locale is always assumed to be the default one of
-.St -p1003.2 ,
-and only the collating elements etc. of that locale are available.
-.Pp
-The back-reference code is subtle and doubts linger about its correctness
-in complex cases.
-.Pp
-.Fn regexec
-performance is poor.
-This will improve with later releases.
-.Fa nmatch
-exceeding 0 is expensive;
-.Fa nmatch
-exceeding 1 is worse.
-.Fn regexec
-is largely insensitive to RE complexity
-.Em except
-that back references are massively expensive.
-RE length does matter; in particular, there is a strong speed bonus
-for keeping RE length under about 30 characters,
-with most special characters counting roughly double.
-.Pp
-.Fn regcomp
-implements bounded repetitions by macro expansion,
-which is costly in time and space if counts are large
-or bounded repetitions are nested.
-A RE like, say,
-.Dq ((((a{1,100}){1,100}){1,100}){1,100}){1,100}
-will (eventually) run almost any existing machine out of swap space.
-.Pp
-There are suspected problems with response to obscure error conditions.
-Notably,
-certain kinds of internal overflow,
-produced only by truly enormous REs or by multiply nested bounded repetitions,
-are probably not handled well.
-.Pp
-Due to a mistake in
-.St -p1003.2 ,
-things like
-.Ql a)b
-are legal REs because
-.Ql \&)
-is
-a special character only in the presence of a previous unmatched
-.Ql \&( .
-This can't be fixed until the spec is fixed.
-.Pp
-The standard's definition of back references is vague.
-For example, does
-.Dq a\e(\e(b\e)*\e2\e)*d
-match
-.Dq abbbd ?
-Until the standard is clarified,
-behavior in such cases should not be relied on.
-.Pp
-The implementation of word-boundary matching is a bit of a kludge,
-and bugs may lurk in combinations of word-boundary matching and anchoring.
diff --git a/librz/util/regex/regex2.h b/librz/util/regex/regex2.h
deleted file mode 100644
index cb84e2e3306..00000000000
--- a/librz/util/regex/regex2.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/*	$OpenBSD: regex2.h,v 1.7 2004/11/30 17:04:23 otto Exp $	*/
-
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)regex2.h	8.4 (Berkeley) 3/20/94
- */
-
-/*
- * internals of regex_t
- */
-#define MAGIC1 ((('r' ^ 0200) << 8) | 'e')
-
-/*
- * The internal representation is a *strip*, a sequence of
- * operators ending with an endmarker.  (Some terminology etc. is a
- * historical relic of earlier versions which used multiple strips.)
- * Certain oddities in the representation are there to permit running
- * the machinery backwards; in particular, any deviation from sequential
- * flow must be marked at both its source and its destination.  Some
- * fine points:
- *
- * - OPLUS_ and O_PLUS are *inside* the loop they create.
- * - OQUEST_ and O_QUEST are *outside* the bypass they create.
- * - OCH_ and O_CH are *outside* the multi-way branch they create, while
- *   OOR1 and OOR2 are respectively the end and the beginning of one of
- *   the branches.  Note that there is an implicit OOR2 following OCH_
- *   and an implicit OOR1 preceding O_CH.
- *
- * In state representations, an operator's bit is on to signify a state
- * immediately *preceding* "execution" of that operator.
- */
-typedef unsigned long sop; /* strip operator */
-typedef long sopno;
-#define OPRMASK       0xf8000000LU
-#define OPDMASK       0x07ffffffLU
-#define OPSHIFT       ((unsigned)27)
-#define OP(n)         ((n)&OPRMASK)
-#define OPND(n)       ((n)&OPDMASK)
-#define SOP(op, opnd) ((op) | (opnd))
-/* operators			   meaning	operand			*/
-/*						(back, fwd are offsets)	*/
-#define OEND    (1LU << OPSHIFT) /* endmarker	-			*/
-#define OCHAR   (2LU << OPSHIFT) /* character	unsigned char		*/
-#define OBOL    (3LU << OPSHIFT) /* left anchor	-			*/
-#define OEOL    (4LU << OPSHIFT) /* right anchor	-			*/
-#define OANY    (5LU << OPSHIFT) /* .		-			*/
-#define OANYOF  (6LU << OPSHIFT) /* [...]	set number		*/
-#define OBACK_  (7LU << OPSHIFT) /* begin \d	paren number		*/
-#define O_BACK  (8LU << OPSHIFT) /* end \d	paren number		*/
-#define OPLUS_  (9LU << OPSHIFT) /* + prefix	fwd to suffix		*/
-#define O_PLUS  (10LU << OPSHIFT) /* + suffix	back to prefix		*/
-#define OQUEST_ (11LU << OPSHIFT) /* ? prefix	fwd to suffix		*/
-#define O_QUEST (12LU << OPSHIFT) /* ? suffix	back to prefix		*/
-#define OLPAREN (13LU << OPSHIFT) /* (		fwd to )		*/
-#define ORPAREN (14LU << OPSHIFT) /* )		back to (		*/
-#define OCH_    (15LU << OPSHIFT) /* begin choice	fwd to OOR2		*/
-#define OOR1    (16LU << OPSHIFT) /* | pt. 1	back to OOR1 or OCH_	*/
-#define OOR2    (17LU << OPSHIFT) /* | pt. 2	fwd to OOR2 or O_CH	*/
-#define O_CH    (18LU << OPSHIFT) /* end choice	back to OOR1		*/
-#define OBOW    (19LU << OPSHIFT) /* begin word	-			*/
-#define OEOW    (20LU << OPSHIFT) /* end word	-			*/
-
-/*
- * Structure for [] character-set representation.  Character sets are
- * done as bit vectors, grouped 8 to a byte vector for compactness.
- * The individual set therefore has both a pointer to the byte vector
- * and a mask to pick out the relevant bit of each byte.  A hash code
- * simplifies testing whether two sets could be identical.
- *
- * This will get trickier for multicharacter collating elements.  As
- * preliminary hooks for dealing with such things, we also carry along
- * a string of multi-character elements, and decide the size of the
- * vectors at run time.
- */
-typedef struct {
-	ut8 *ptr; /* -> ut8 [csetsize] */
-	ut8 mask; /* bit within array */
-	ut8 hash; /* hash code */
-	size_t smultis;
-	char *multis; /* -> char[smulti]  ab\0cd\0ef\0\0 */
-} cset;
-/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
-#define CHadd(cs, c)     ((cs)->ptr[(ut8)(c)] |= (cs)->mask, (cs)->hash += (c))
-#define CHsub(cs, c)     ((cs)->ptr[(ut8)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
-#define CHIN(cs, c)      ((cs)->ptr[(ut8)(c)] & (cs)->mask)
-#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
-#define MCsub(p, cs, cp) mcsub(p, cs, cp)
-#define MCin(p, cs, cp)  mcin(p, cs, cp)
-
-/* stuff for character categories */
-typedef unsigned char cat_t;
-
-/*
- * main compiled-expression structure
- */
-struct re_guts {
-	int magic;
-#define MAGIC2 ((('R' ^ 0200) << 8) | 'E')
-	sop *strip; /* malloced area for strip */
-	int csetsize; /* number of bits in a cset vector */
-	int ncsets; /* number of csets in use */
-	cset *sets; /* -> cset [ncsets] */
-	ut8 *setbits; /* -> ut8[csetsize][ncsets/CHAR_BIT] */
-	int cflags; /* copy of regcomp() cflags argument */
-	sopno nstates; /* = number of sops */
-	sopno firststate; /* the initial OEND (normally 0) */
-	sopno laststate; /* the final OEND */
-	int iflags; /* internal flags */
-#define USEBOL 01 /* used ^ */
-#define USEEOL 02 /* used $ */
-#define BAD    04 /* something wrong */
-	int nbol; /* number of ^ used */
-	int neol; /* number of $ used */
-	int ncategories; /* how many character categories */
-	cat_t *categories; /* ->catspace[-CHAR_MIN] */
-	char *must; /* match must contain this string */
-	int mlen; /* length of must */
-	size_t nsub; /* copy of re_nsub */
-	int backrefs; /* does it use back references? */
-	sopno nplus; /* how deep does it nest +s? */
-	/* catspace must be last */
-	cat_t catspace[1]; /* actually [NC] */
-};
-
-/* misc utilities */
-#undef OUT
-#define OUT       (-CHAR_MIN + CHAR_MAX + 1) /* a non-character value */
-#define ISWORD(c) (isalnum((ut8)(c)) || (c) == '_')
diff --git a/librz/util/regex/regexec.c b/librz/util/regex/regexec.c
deleted file mode 100644
index 97ba732dc95..00000000000
--- a/librz/util/regex/regexec.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*	$OpenBSD: regexec.c,v 1.11 2005/08/05 13:03:00 espie Exp $ */
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
- */
-
-/*
- * the outer shell of regexec()
- *
- * This file includes engine.c *twice*, after muchos fiddling with the
- * macros that code uses.  This lets the same code operate on two different
- * representations for state sets.
- */
-#include <sys/types.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <ctype.h>
-#include <rz_regex.h>
-
-#include "utils.h"
-#include "regex2.h"
-
-/* macros for manipulating states, small version */
-#define states1          long long /* for later use in regexec() decision */
-#define states           states1
-#define CLEAR(v)         ((v) = 0)
-#define SET0(v, n)       ((v) &= ~((unsigned states)1 << (n)))
-#define SET1(v, n)       ((v) |= (unsigned states)1 << (n))
-#define ISSET(v, n)      (((v) & ((unsigned states)1 << (n))) != 0)
-#define ASSIGN(d, s)     ((d) = (s))
-#define EQ(a, b)         ((a) == (b))
-#define STATEVARS        states dummy /* dummy version */
-#define STATESETUP(m, n) /* nothing */
-#define STATETEARDOWN(m) /* nothing */
-#define SETUP(v)         ((v) = 0)
-#define onestate         states
-#define INIT(o, n)       ((o) = (unsigned states)1 << (n))
-#define INC(o)           ((o) <<= 1)
-#define ISSTATEIN(v, o)  (((v) & (o)) != 0)
-/* some abbreviations; note that some of these know variable names! */
-/* do "if I'm here, I can also be there" etc without branches */
-#define FWD(dst, src, n)  ((dst) |= ((unsigned states)(src) & (here)) << (n))
-#define BACK(dst, src, n) ((dst) |= ((unsigned states)(src) & (here)) >> (n))
-#define ISSETBACK(v, n)   (((v) & ((unsigned states)here >> (n))) != 0)
-/* function names */
-#define SNAMES /* engine.c looks after details */
-
-#include "engine.c"
-
-/* now undo things */
-#undef states
-#undef CLEAR
-#undef SET0
-#undef SET1
-#undef ISSET
-#undef ASSIGN
-#undef EQ
-#undef STATEVARS
-#undef STATESETUP
-#undef STATETEARDOWN
-#undef SETUP
-#undef onestate
-#undef INIT
-#undef INC
-#undef ISSTATEIN
-#undef FWD
-#undef BACK
-#undef ISSETBACK
-#undef SNAMES
-
-/* macros for manipulating states, large version */
-#define states       char *
-#define CLEAR(v)     memset(v, 0, m->g->nstates)
-#define SET0(v, n)   ((v)[n] = 0)
-#define SET1(v, n)   ((v)[n] = 1)
-#define ISSET(v, n)  ((v)[n])
-#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
-#define EQ(a, b)     (memcmp(a, b, m->g->nstates) == 0)
-#define STATEVARS \
-	states1 vn; \
-	char *space
-#define STATESETUP(m, nv) \
-	{ \
-		(m)->space = malloc((nv) * (m)->g->nstates); \
-		if (!(m)->space) \
-			return RZ_REGEX_ESPACE; \
-		(m)->vn = 0; \
-	}
-#define STATETEARDOWN(m) \
-	{ free((m)->space); }
-#define SETUP(v)        ((v) = &m->space[m->vn++ * m->g->nstates])
-#define onestate        states1
-#define INIT(o, n)      ((o) = (n))
-#define INC(o)          ((o)++)
-#define ISSTATEIN(v, o) ((v)[o])
-/* some abbreviations; note that some of these know variable names! */
-/* do "if I'm here, I can also be there" etc without branches */
-#define FWD(dst, src, n)  ((dst)[here + (n)] |= (src)[here])
-#define BACK(dst, src, n) ((dst)[here - (n)] |= (src)[here])
-#define ISSETBACK(v, n)   ((v)[here - (n)])
-/* function names */
-#define LNAMES /* flag */
-
-#include "engine.c"
-
-RZ_API bool rz_regex_check(const RzRegex *rr, const char *str) {
-	return rz_regex_exec(rr, str, 0, NULL, rr->re_flags);
-}
-/*
- - regexec - interface for matching
- *
- * We put this here so we can exploit knowledge of the state representation
- * when choosing which matcher to call.  Also, by this point the matchers
- * have been prototyped.
- */
-/* 0 success, RZ_REGEX_NOMATCH failure */
-RZ_API int rz_regex_exec(const RzRegex *preg, const char *string, size_t nmatch,
-	RzRegexMatch pmatch[], int eflags) {
-	struct re_guts *g;
-#ifdef REDEBUG
-#define GOODFLAGS(f) (f)
-#else
-#define GOODFLAGS(f) ((f) & (RZ_REGEX_NOTBOL | RZ_REGEX_NOTEOL | RZ_REGEX_STARTEND | RZ_REGEX_LARGE))
-#endif
-	if (!preg || !string) {
-		return RZ_REGEX_ASSERT;
-	}
-
-	g = preg->re_g;
-	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) {
-		return (RZ_REGEX_BADPAT);
-	}
-	if (g->iflags & BAD) { /* backstop for no-debug case */
-		return (RZ_REGEX_BADPAT);
-	}
-	eflags = GOODFLAGS(eflags);
-	if (g->nstates <= CHAR_BIT * sizeof(states1) && !(eflags & RZ_REGEX_LARGE)) {
-		return (smatcher(g, (char *)string, nmatch, pmatch, eflags));
-	} else {
-		return (lmatcher(g, (char *)string, nmatch, pmatch, eflags));
-	}
-}
diff --git a/librz/util/regex/test.c b/librz/util/regex/test.c
deleted file mode 100644
index 91fc75a9d44..00000000000
--- a/librz/util/regex/test.c
+++ /dev/null
@@ -1,55 +0,0 @@
-#include <stdio.h>
-#include <rz_regex.h>
-
-int _main(void) {
-	RzRegex rx;
-	int rc = rz_regex_comp(&rx, "^hi", RZ_REGEX_NOSUB);
-	if (rc) {
-		printf("error\n");
-
-	} else {
-		rc = rz_regex_exec(&rx, "patata", 0, 0, 0);
-		printf("out = %d\n", rc);
-
-		rc = rz_regex_exec(&rx, "hillow", 0, 0, 0);
-		printf("out = %d\n", rc);
-	}
-	rz_regex_free(&rx);
-	return 0;
-}
-
-static void test_or(void) {
-	RzRegex *rx = rz_regex_new("(eax|ebx)", "e");
-	printf("result (%s) = %d\n", "mov eax", rz_regex_match("(eax|ebx)", "e", "mov eax"));
-	printf("result (%s) = %d\n", "mov ebx", rz_regex_match("(eax|ebx)", "e", "mov ebx"));
-	printf("result (%s) = %d\n", "mov eax", rz_regex_match("(eax|ebx)", "e", "mov ecx"));
-	printf("result (%s) = %d\n", "mov ebx", rz_regex_match("(eax|ecx)", "e", "mov ebx"));
-	printf("result (%s) = %d\n", "mov eax", rz_regex_check(rx, "mov eax"));
-	printf("result (%s) = %d\n", "mov ebx", rz_regex_check(rx, "mov ebx"));
-	printf("result (%s) = %d\n", "mov eax", rz_regex_exec(rx, "mov eax", 0, 0, 1));
-	printf("result (%s) = %d\n", "mov ebx", rz_regex_exec(rx, "mov ebx", 0, 0, 1));
-	rz_regex_free(rx);
-}
-
-int main(int argc, char **argv) {
-	const char *needle = "^hi";
-	const char *haystack_1 = "patata";
-	const char *haystack_2 = "hillow";
-	if (argc > 3) {
-		needle = argv[1];
-		haystack_1 = argv[2];
-		haystack_2 = argv[3];
-	} else
-		printf("Using default values\n");
-	RzRegex *rx = rz_regex_new(needle, "");
-	if (rx) {
-		int res = rz_regex_exec(rx, haystack_1, 0, 0, 0);
-		printf("result (%s) = %d\n", haystack_1, res);
-		res = rz_regex_exec(rx, haystack_2, 0, 0, 0);
-		printf("result (%s) = %d\n", haystack_2, res);
-		rz_regex_free(rx);
-	} else
-		printf("oops, cannot compile regexp\n");
-	test_or();
-	return 0;
-}
diff --git a/librz/util/regex/utils.h b/librz/util/regex/utils.h
deleted file mode 100644
index 5bcda97a97e..00000000000
--- a/librz/util/regex/utils.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*	$OpenBSD: utils.h,v 1.4 2003/06/02 20:18:36 millert Exp $	*/
-
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)utils.h	8.3 (Berkeley) 3/20/94
- */
-
-/* utility definitions */
-#define DUPMAX 255
-//_POSIX2_RE_DUP_MAX	/* xxx is this right? */
-#define INTFINITY (DUPMAX + 1)
-#define NC        (CHAR_MAX - CHAR_MIN + 1)
-
-#define STRLCPY(x, y, z) \
-	{ \
-		strncpy((x), (y), (z)); \
-		(x)[(z) ? (z)-1 : 0] = 0; \
-	}
-
-/* switch off assertions (if not already off) if no REDEBUG */
-#ifndef REDEBUG
-#ifndef NDEBUG
-#define NDEBUG /* no assertions please */
-#endif
-#endif
-#include <assert.h>
-
-/* for old systems with bcopy() but no memmove() */
-#ifdef USEBCOPY
-#define memmove(d, s, c) bcopy(s, d, c)
-#endif
-#define ut8 unsigned char
diff --git a/librz/util/str.c b/librz/util/str.c
index 14d877b6042..f3efeb643a3 100644
--- a/librz/util/str.c
+++ b/librz/util/str.c
@@ -1,13 +1,14 @@
 // SPDX-FileCopyrightText: 2007-2020 pancake <pancake@nopcode.org>
 // SPDX-License-Identifier: LGPL-3.0-only
 
-#include <rz_regex.h>
+#include <rz_util/rz_regex.h>
 #include "rz_list.h"
 #include "rz_types.h"
 #include "rz_util.h"
 #include "rz_cons.h"
 #include "rz_bin.h"
 #include "rz_util/rz_assert.h"
+#include <rz_vector.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -1180,7 +1181,7 @@ RZ_API RZ_OWN char *rz_str_replace(RZ_OWN char *str, const char *key, const char
 				if (!newstr) {
 					eprintf("realloc fail\n");
 					RZ_FREE(str);
-					break;
+					return NULL;
 				}
 				str = newstr;
 			}
@@ -3409,31 +3410,35 @@ static RzList /*<char *>*/ *str_split_list_common(char *str, const char *c, int
 static RzList /*<char *>*/ *str_split_list_common_regex(RZ_BORROW char *str, RZ_BORROW RzRegex *r, int n, bool trim, bool dup) {
 	rz_return_val_if_fail(str && r, NULL);
 	RzList *lst = rz_list_newf(dup ? free : NULL);
-	RzRegexMatch m[1];
 	char *aux;
 	int i = 0;
 	int s = 0, e = 0;
 	int j = 0;
-	while (rz_regex_exec(r, str + j, 1, m, 0) == 0) {
+	void **it;
+	RzPVector *matches = rz_regex_match_all(r, str, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
+	rz_pvector_foreach (matches, it) {
+		RzPVector *m = (RzPVector *)*it;
+		RzRegexMatch *group0 = rz_pvector_head(m);
 		if (n == i && n > 0) {
 			break;
 		}
-		s = m[0].rm_so; // Match start (inclusive) in string str + j
-		e = m[0].rm_eo; // Match end (exclusive) in string str + j
+		s = group0->start; // Match start (inclusive) in string str + j
+		e = group0->start + group0->len; // Match end (exclusive) in string str + j
 		if (dup) {
-			aux = rz_str_ndup(str + j, s);
+			aux = rz_str_ndup(str + j, s - j);
 		} else {
 			// Overwrite split chararcters.
-			memset(str + j + s, 0, e - s);
+			memset(str + s, 0, e - s);
 			aux = str + j;
 		}
 		if (trim) {
 			rz_str_trim(aux);
 		}
 		rz_list_append(lst, aux);
-		j += e;
+		j = e;
 		++i;
 	}
+	rz_pvector_free(matches);
 	if (*(str + j) == 0 || (n == i && n > 0) || rz_list_length(lst) == 0) {
 		// No token left.
 		return lst;
@@ -3483,7 +3488,7 @@ RZ_API RzList /*<char *>*/ *rz_str_split_list(char *str, const char *c, int n) {
  */
 RZ_API RZ_OWN RzList /*<char *>*/ *rz_str_split_list_regex(RZ_NONNULL char *str, RZ_NONNULL const char *r, int n) {
 	rz_return_val_if_fail(str && r, NULL);
-	RzRegex *regex = rz_regex_new(r, "e");
+	RzRegex *regex = rz_regex_new(r, RZ_REGEX_EXTENDED, 0);
 	RzList *res = str_split_list_common_regex(str, regex, n, false, false);
 	rz_regex_free(regex);
 	return res;
@@ -3545,7 +3550,7 @@ RZ_API RzList /*<char *>*/ *rz_str_split_duplist_n(const char *_str, const char
 RZ_API RZ_OWN RzList /*<char *>*/ *rz_str_split_duplist_n_regex(RZ_NONNULL const char *_str, RZ_NONNULL const char *r, int n, bool trim) {
 	rz_return_val_if_fail(_str && r, NULL);
 	char *str = strdup(_str);
-	RzRegex *regex = rz_regex_new(r, "e");
+	RzRegex *regex = rz_regex_new(r, RZ_REGEX_EXTENDED, 0);
 	RzList *res = str_split_list_common_regex(str, regex, n, trim, true);
 	free(str);
 	rz_regex_free(regex);
diff --git a/meson.build b/meson.build
index 5b68819aab2..a55a9e0b50e 100644
--- a/meson.build
+++ b/meson.build
@@ -195,6 +195,24 @@ else
   add_project_arguments(['-DUSE_SYS_CAPSTONE'], language: 'c')
 endif
 
+# Handle PCRE2
+cpu_jit_supported = [ 'aarch64', 'arm', 'mips', 'mips64', 'ppc', 'ppc64', 'riscv32', 'riscv64', 's390x', 'x86', 'x86_64' ]
+pcre2_jit_supported = target_machine.cpu_family() in cpu_jit_supported and cc.get_id() != 'tcc'
+if pcre2_jit_supported
+  add_project_arguments(['-DSUPPORTS_PCRE2_JIT'], language: 'c')
+endif
+
+pcre2_dep_opt = get_option('use_sys_pcre2')
+pcre2_dep = disabler()
+if pcre2_dep_opt.enabled() or pcre2_dep_opt.auto()
+  pcre2_dep = dependency('libpcre2-8', required: false, static: true)
+  if not pcre2_dep.found()
+    pcre2_dep = cc.find_library('pcre2', required: true, static: true)
+  endif
+else
+  pcre2_dep = dependency('pcre2', 'pcre2_dep', version: '>=10.42', required: true, static: true)
+endif
+
 # handle magic library
 sys_magic_opt = get_option('use_sys_magic')
 sys_magic = disabler()
@@ -339,6 +357,8 @@ foreach it : ccs
   it_userconf.set10('IS_PORTABLE', get_option('portable'))
   it_userconf.set10('HAVE_LIB_MAGIC', sys_magic.found())
   it_userconf.set10('USE_LIB_MAGIC', sys_magic.found())
+  it_userconf.set10('HAVE_LIB_PCRE2', pcre2_dep.found())
+  it_userconf.set10('USE_LIB_PCRE2', pcre2_dep.found())
   it_userconf.set10('HAVE_LIB_XXHASH', xxhash_dep.found())
   it_userconf.set10('USE_LIB_XXHASH', xxhash_dep.found())
   it_userconf.set10('DEBUGGER', has_debugger)
@@ -380,6 +400,7 @@ foreach it : ccs
   it_userconf.set10('HAVE_PTHREAD', have_pthread)
   it_userconf.set10('HAVE_LZMA', get_option('use_lzma'))
   it_userconf.set10('HAVE_ZLIB', get_option('use_zlib'))
+  it_userconf.set10('SUPPORTS_PCRE2_JIT', pcre2_jit_supported)
 
   if it_machine.system() == 'freebsd' or it_machine.system() == 'dragonfly'
     add_project_link_arguments('-Wl,--unresolved-symbols,ignore-in-object-files', language: 'c', native: it_native)
@@ -749,7 +770,10 @@ summary({
   'Swift demangler': get_option('use_swift_demangler'),
   'Debugger enabled': has_debugger,
   'Capstone version': capstone_dep.version(),
+  'PCRE2 version': pcre2_dep.version(),
+  'PCRE2 JIT': pcre2_jit_supported,
   'System magic library': sys_magic.found() and sys_magic.type_name() != 'internal',
+  'System pcre2 library': pcre2_dep.found() and pcre2_dep.type_name() != 'internal',
   'System xxhash library': xxhash_dep.found() and xxhash_dep.type_name() != 'internal',
   'System libmspack library': libmspack_dep.found() and libmspack_dep.type_name() != 'internal',
   'System openssl library': sys_openssl.found() and sys_openssl.type_name() != 'internal',
diff --git a/meson_options.txt b/meson_options.txt
index e51701f7686..7abaf40e1ab 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -34,6 +34,7 @@ option('use_sys_xxhash', type: 'feature', value: 'disabled')
 option('use_sys_openssl', type: 'feature', value: 'disabled')
 option('use_sys_libmspack', type: 'feature', value: 'disabled')
 option('use_sys_tree_sitter', type: 'feature', value: 'disabled')
+option('use_sys_pcre2', type: 'feature', value: 'disabled')
 option('use_swift_demangler', type: 'boolean', value: true, description: 'If false, disables the swift demangler')
 option('use_gpl', type: 'boolean', value: true, description: 'Set to false when you want to disable gpl code')
 option('install_sigdb', type: 'boolean', value: false, description: 'Downloads and installs rizin sigdb')
diff --git a/subprojects/packagefiles/pcre2/meson.build b/subprojects/packagefiles/pcre2/meson.build
new file mode 100644
index 00000000000..1fcc5752388
--- /dev/null
+++ b/subprojects/packagefiles/pcre2/meson.build
@@ -0,0 +1,85 @@
+project('pcre2', 'c', version: '10.42')
+
+cc = meson.get_compiler('c')
+
+conf_data = configuration_data()
+
+pcre2_chartables = configure_file(input : 'src/pcre2_chartables.c.dist',
+  output : 'pcre2_chartables.c',
+  configuration : conf_data)
+
+pcre2_h = configure_file(input : 'src/pcre2.h.generic',
+  output : 'pcre2.h',
+  configuration : conf_data)
+
+config_h = configure_file(input : 'src/config.h.generic',
+  output : 'config.h',
+  configuration : conf_data)
+
+libpcre2_c_args = [
+  '-DHAVE_CONFIG_H', # Default values from config.h
+  '-DPCRE2_CODE_UNIT_WIDTH=8',
+  '-DHAVE_MEMMOVE',
+  '-DSUPPORT_PCRE2_8',
+  '-DSUPPORT_UNICODE',
+  '-fvisibility=default',
+]
+
+pcre2_files = [
+  'src/pcre2_auto_possess.c',
+  pcre2_chartables,
+  'src/pcre2_compile.c',
+  'src/pcre2_config.c',
+  'src/pcre2_context.c',
+  'src/pcre2_convert.c',
+  'src/pcre2_dfa_match.c',
+  'src/pcre2_error.c',
+  'src/pcre2_extuni.c',
+  'src/pcre2_find_bracket.c',
+  'src/pcre2_maketables.c',
+  'src/pcre2_match.c',
+  'src/pcre2_match_data.c',
+  'src/pcre2_newline.c',
+  'src/pcre2_ord2utf.c',
+  'src/pcre2_pattern_info.c',
+  'src/pcre2_script_run.c',
+  'src/pcre2_serialize.c',
+  'src/pcre2_string_utils.c',
+  'src/pcre2_study.c',
+  'src/pcre2_substitute.c',
+  'src/pcre2_substring.c',
+  'src/pcre2_tables.c',
+  'src/pcre2_ucd.c',
+  'src/pcre2_valid_utf.c',
+  'src/pcre2_xclass.c',
+]
+
+cpu_jit_supported = [ 'aarch64', 'arm', 'mips', 'mips64', 'ppc', 'ppc64', 'riscv32', 'riscv64', 's390x', 'x86', 'x86_64' ]
+
+# tcc doesn't support the MSVC asm syntax PCRE2 uses (`__asm { ... }`).
+# It is used in the JIT compiler code.
+if cc.get_id() != 'tcc' and target_machine.cpu_family() in cpu_jit_supported
+  libpcre2_c_args += ['-DSUPPORT_JIT']
+  pcre2_files += ['src/pcre2_jit_compile.c']
+endif
+
+if target_machine.system() == 'openbsd' or target_machine.system() == 'netbsd'
+  # jit compilation fails with "no more memory" if wx allocations are allowed.
+  libpcre2_c_args += ['-DSLJIT_WX_EXECUTABLE_ALLOCATOR']
+endif
+
+pcre2_includes = [
+  include_directories('.'),
+  include_directories('src/'),
+]
+
+libpcre2 = static_library('pcre2', pcre2_files,
+  c_args: libpcre2_c_args,
+  include_directories: pcre2_includes,
+  install: false,
+)
+
+pcre2_dep = declare_dependency(
+  link_with: libpcre2,
+  include_directories: pcre2_includes
+)
diff --git a/subprojects/pcre2.wrap b/subprojects/pcre2.wrap
new file mode 100644
index 00000000000..2bfaf19a850
--- /dev/null
+++ b/subprojects/pcre2.wrap
@@ -0,0 +1,8 @@
+[wrap-git]
+url = https://github.com/PCRE2Project/pcre2.git
+revision = 52c08847921a324c804cabf2814549f50bce1265
+directory = pcre2
+patch_directory = pcre2
+
+[provide]
+pcre2=pcre2_dep
diff --git a/test/db/archos/darwin-arm64/dbg b/test/db/archos/darwin-arm64/dbg
index 21e2b25dae7..4c444491c7d 100644
--- a/test/db/archos/darwin-arm64/dbg
+++ b/test/db/archos/darwin-arm64/dbg
@@ -31,10 +31,14 @@ stur w0, [fp, -4]
 stur x1, [fp, -0x10]
 str x8, [sp, 0x10]
 EOF
-REGEXP_FILTER_ERR=(([a-zA-Z:]+|[0-9a-f][0-9a-f][0-9a-f])\s+)
+REGEXP_FILTER_ERR=(((Continue\suntil)|(hit\sbreakpoint\sat:)|[0-9a-f][0-9a-f][0-9a-f]\n))
 EXPECT_ERR=<<EOF
-Continue until c88
-hit breakpoint at: c88
+Continue until
+c88
+
+hit breakpoint at:
+c88
+
 EOF
 RUN
 
@@ -46,10 +50,37 @@ dm~hello~[3-9]
 EOF
 REGEXP_FILTER_OUT=([a-zA-Z0-9_-]+[\.rwx_-]*\s+)
 EXPECT=<<EOF
-- usr 16K u r-x hello-macos-arm64 hello-macos-arm64
-- usr 16K u rw- hello-macos-arm64 hello-macos-arm64
-- usr 16K u rw- hello-macos-arm64 hello-macos-arm64
-- usr 16K u r-- hello-macos-arm64 hello-macos-arm64
+- 
+usr 
+16K 
+u 
+r-x 
+hello-macos-arm64 
+hello-macos-arm64
+
+- 
+usr 
+16K 
+u 
+rw- 
+hello-macos-arm64 
+hello-macos-arm64
+
+- 
+usr 
+16K 
+u 
+rw- 
+hello-macos-arm64 
+hello-macos-arm64
+
+- 
+usr 
+16K 
+u 
+r-- 
+hello-macos-arm64 
+hello-macos-arm64
 EOF
 RUN
 
@@ -61,7 +92,7 @@ fl@F:maps~hello~[1-]
 echo --
 dm*~hello~[0-2]
 EOF
-REGEXP_FILTER_OUT=([0-9]+ [a-zA-Z0-9_-]+\.[\.rwx_]*|[f-].+|\n)
+REGEXP_FILTER_OUT=([0-9]+ [a-zA-Z0-9_-]+\.[\.rwx_]*|[f-].+)
 EXPECT=<<EOF
 16384 hello_macos_arm64.r_x
 16384 hello_macos_arm64.rw
@@ -88,18 +119,23 @@ ds
 dr pc
 dr x8
 EOF
-REGEXP_FILTER_OUT=(([a-zA-Z:=-]+|[0-9a-f][0-9a-f][0-9a-f]|x[0-9]+ = [a-z0-9]+)\s+)
+REGEXP_FILTER_OUT=((x8\s=\s0x.*)|(pc\s=)|(f44)|(f48))
 EXPECT=<<EOF
-pc = f44
+pc =
+f44
 x8 = 0x0000000000000000
---
-pc = f48
+pc =
+f48
 x8 = 0x0000000000000064
 EOF
-REGEXP_FILTER_ERR=(([a-zA-Z:]+|[0-9a-f][0-9a-f][0-9a-f])\s+)
+REGEXP_FILTER_ERR=(((Continue\suntil)|(hit\sbreakpoint\sat:)|[0-9a-f][0-9a-f][0-9a-f]\n))
 EXPECT_ERR=<<EOF
-Continue until f30
-hit breakpoint at: f30
+Continue until
+f30
+
+hit breakpoint at:
+f30
+
 EOF
 RUN
 
@@ -121,21 +157,28 @@ dc
 dr pc
 dr x8
 EOF
-REGEXP_FILTER_OUT=(([a-zA-Z:=-]+|[0-9a-f][0-9a-f][0-9a-f]|x[0-9]+ = [a-z0-9]+)\s+)
+REGEXP_FILTER_OUT=((x8\s=\s0x.*)|(pc\s=)|(f30)|(f44)|(f48))
 EXPECT=<<EOF
-pc = f30
---
-pc = f44
+pc =
+f30
+pc =
+f44
 x8 = 0x0000000000000000
---
-pc = f48
+pc =
+f48
 x8 = 0x0000000000000064
 EOF
-REGEXP_FILTER_ERR=(([a-zA-Z:]+|[0-9a-f][0-9a-f][0-9a-f])\s+)
+REGEXP_FILTER_ERR=(((hit\sbreakpoint\sat:)|[0-9a-f][0-9a-f][0-9a-f]\n))
 EXPECT_ERR=<<EOF
-hit breakpoint at: f30
-hit breakpoint at: f44
-hit breakpoint at: f48
+hit breakpoint at:
+f30
+
+hit breakpoint at:
+f44
+
+hit breakpoint at:
+f48
+
 EOF
 RUN
 
@@ -153,12 +196,10 @@ EXPECT=<<EOF
 x8 = 0x000000000000002a
 Result = 1337
 EOF
-REGEXP_FILTER_ERR=(([a-zA-Z:]+|[0-9a-f][0-9a-f][0-9a-f])\s+)
+REGEXP_FILTER_ERR=((hit\sbreakpoint\sat:)|(f70\n))
 EXPECT_ERR=<<EOF
-hit breakpoint at: f70
-
-Process finished
-
+hit breakpoint at:
+f70
 EOF
 RUN
 
@@ -180,20 +221,27 @@ dc
 dr pc
 dr x8
 EOF
-REGEXP_FILTER_OUT=(([a-zA-Z:=-]+|[0-9a-f][0-9a-f][0-9a-f]|x[0-9]+ = [a-z0-9]+)\s+)
+REGEXP_FILTER_OUT=((x8\s=\s0x.*)|(pc\s=)|(f30)|(f44)|(f48))
 EXPECT=<<EOF
-pc = f30
---
-pc = f44
+pc =
+f30
+pc =
+f44
 x8 = 0x0000000000000000
---
-pc = f48
+pc =
+f48
 x8 = 0x0000000000000064
 EOF
-REGEXP_FILTER_ERR=(([a-zA-Z:]+|[0-9a-f][0-9a-f][0-9a-f])\s+)
+REGEXP_FILTER_ERR=(((hit\sbreakpoint\sat:)|[0-9a-f][0-9a-f][0-9a-f]\n))
 EXPECT_ERR=<<EOF
-hit breakpoint at: f30
-hit breakpoint at: f44
-hit breakpoint at: f48
+hit breakpoint at:
+f30
+
+hit breakpoint at:
+f44
+
+hit breakpoint at:
+f48
+
 EOF
 RUN
diff --git a/test/db/archos/darwin-x64/dbg b/test/db/archos/darwin-x64/dbg
index 4dc048c6817..a855fefe18a 100644
--- a/test/db/archos/darwin-x64/dbg
+++ b/test/db/archos/darwin-x64/dbg
@@ -44,11 +44,10 @@ ARGS=-d
 CMDS=<<EOF
 dm~hello
 EOF
-REGEXP_FILTER_OUT=([a-zA-Z0-9_\.-]+\s+)
 EXPECT=<<EOF
-0x0000000100000000 - 0x0000000100001000 - usr     4K u r-x hello-objc-osx hello-objc-osx hello_objc_osx.r_x
-0x0000000100001000 - 0x0000000100002000 - usr     4K u rw- hello-objc-osx hello-objc-osx hello_objc_osx.rw
-0x0000000100002000 - 0x0000000100003000 - usr     4K u r-- hello-objc-osx hello-objc-osx hello_objc_osx.r
+0x0000000100000000 - 0x0000000100001000 - usr     4K u r-x hello-objc-osx /Users/runner/work/rizin/rizin/test/bins/mach0/hello-objc-osx ; hello_objc_osx.r_x
+0x0000000100001000 - 0x0000000100002000 - usr     4K u rw- hello-objc-osx /Users/runner/work/rizin/rizin/test/bins/mach0/hello-objc-osx ; hello_objc_osx.rw
+0x0000000100002000 - 0x0000000100003000 - usr     4K u r-- hello-objc-osx /Users/runner/work/rizin/rizin/test/bins/mach0/hello-objc-osx ; hello_objc_osx.r
 EOF
 EXPECT_ERR=
 RUN
@@ -139,21 +138,36 @@ dc
 dr rip
 dr rax
 EOF
-REGEXP_FILTER_OUT=(([a-zA-Z:=-]+|[0-9a-f][0-9a-f][0-9a-f]|r[^i][a-z]+ = [a-z0-9]+)\s+)
+# Due to ASLR only the last 12bits are stable and get tested here.
+REGEXP_FILTER_OUT=((r[ai][xp])|([0-9a-f][0-9a-f][0-9a-f]\n))
 EXPECT=<<EOF
-rip = f20
---
-rip = f34
-rax = 0x0000000000000000
---
-rip = f38
-rax = 0x0000000000000064
+rip
+f20
+
+rip
+f34
+
+rax
+000
+
+rip
+f38
+
+rax
+064
+
 EOF
-REGEXP_FILTER_ERR=(([a-zA-Z:]+|[0-9a-f][0-9a-f][0-9a-f])\s+)
+REGEXP_FILTER_ERR=((hit\sbreakpoint\sat:)|([0-9a-f][0-9a-f][0-9a-f]\n))
 EXPECT_ERR=<<EOF
-hit breakpoint at: f20
-hit breakpoint at: f34
-hit breakpoint at: f38
+hit breakpoint at:
+f20
+
+hit breakpoint at:
+f34
+
+hit breakpoint at:
+f38
+
 EOF
 RUN
 
@@ -175,20 +189,34 @@ dc
 dr rip
 dr rax
 EOF
-REGEXP_FILTER_OUT=(([a-zA-Z:=-]+|[0-9a-f][0-9a-f][0-9a-f]|r[^i][a-z]+ = [a-z0-9]+)\s+)
+REGEXP_FILTER_OUT=((r[ia][px])|([0-9a-f][0-9a-f][0-9a-f]\n))
 EXPECT=<<EOF
-rip = ea0
---
-rip = edd
-rax = 0x0000000000000190
---
-rip = ee3
-rax = 0x00000000000000a0
+rip
+ea0
+
+rip
+edd
+
+rax
+190
+
+rip
+ee3
+
+rax
+0a0
+
 EOF
-REGEXP_FILTER_ERR=(([a-zA-Z:]+|[0-9a-f][0-9a-f][0-9a-f])\s+)
+REGEXP_FILTER_ERR=((hit\sbreakpoint\sat:)|([0-9a-f][0-9a-f][0-9a-f]\n))
 EXPECT_ERR=<<EOF
-hit breakpoint at: ea0
-hit breakpoint at: edd
-hit breakpoint at: ee3
+hit breakpoint at:
+ea0
+
+hit breakpoint at:
+edd
+
+hit breakpoint at:
+ee3
+
 EOF
 RUN
diff --git a/test/db/archos/linux-x64/dbg_dmh b/test/db/archos/linux-x64/dbg_dmh
index bcd5a8b24aa..264584c3bfb 100644
--- a/test/db/archos/linux-x64/dbg_dmh
+++ b/test/db/archos/linux-x64/dbg_dmh
@@ -24,14 +24,18 @@ echo ----
 7dso
 dmh | tail -n 3
 EOF
-REGEXP_FILTER_OUT=(status=[a-z,[0]+)|(size=0x[a-f0-9]+)|-+|\n|[()]|Chunk
+REGEXP_FILTER_OUT=(status=[a-z]+)|(size=0x[a-f0-9]+)
 EXPECT=<<EOF
-Chunk(status=allocated,size=0x12010)
-Chunk(status=free,size=0xed60)
-----
-Chunk(status=allocated,size=0x12010)
-Chunk(status=allocated,size=0x20)
-Chunk(status=free,size=0xed40)
+status=allocated
+size=0x12010
+status=free
+size=0xed60
+status=allocated
+size=0x12010
+status=allocated
+size=0x20
+status=free
+size=0xed40
 EOF
 RUN
 
diff --git a/test/db/archos/linux-x64/dbg_oo b/test/db/archos/linux-x64/dbg_oo
index 93b46f624bb..ebe44077ceb 100644
--- a/test/db/archos/linux-x64/dbg_oo
+++ b/test/db/archos/linux-x64/dbg_oo
@@ -112,8 +112,9 @@ ARGS=-e log.level=4
 CMDS=<<EOF
 ood
 EOF
-REGEXP_FILTER_ERR=([a-zA-Z-]+\s+)
+REGEXP_FILTER_ERR=(Process\swith\sPID)|(helloworld-gcc\sreopened\sin\sread-write\smode)
 EXPECT_ERR=<<EOF
-Process with PID File -helloworld-gcc reopened in read-write mode
+Process with PID
+helloworld-gcc reopened in read-write mode
 EOF
 RUN
diff --git a/test/db/archos/linux-x64/dbg_step b/test/db/archos/linux-x64/dbg_step
index 55f509eb8a1..6c77c14b764 100644
--- a/test/db/archos/linux-x64/dbg_step
+++ b/test/db/archos/linux-x64/dbg_step
@@ -275,9 +275,7 @@ EOF
 EXPECT=<<EOF
 0x400574
 EOF
-REGEXP_FILTER_ERR=<<EOF
-Continue[ a-zA-Z0-9]+
-EOF
+REGEXP_FILTER_ERR=(Continue\suntil\s[a-zA-Z0-9]+)
 EXPECT_ERR=<<EOF
 Continue until 0x00400574
 EOF
diff --git a/test/db/archos/linux-x64/dbg_trace b/test/db/archos/linux-x64/dbg_trace
index 0aa0ab90afc..9e55d75b0d7 100644
--- a/test/db/archos/linux-x64/dbg_trace
+++ b/test/db/archos/linux-x64/dbg_trace
@@ -61,10 +61,10 @@ echo ----
 dsui call; ds
 dbt~[6-]  # dbtt here would be nice
 EOF
-REGEXP_FILTER_OUT=(loc\.[^_]\S*.|main\S*.|entry0\+\d+\s+|-+\n)
+REGEXP_FILTER_OUT=((loc\.[^_]\S*)|(main\s?\S*)|(entry0\+\d+)|(----))
 EXPECT=<<EOF
 main loc.func_6+6
-entry0+41 
+entry0+41
 ----
 loc.func_0
 main+8
diff --git a/test/db/archos/windows-x64/dbg_dts b/test/db/archos/windows-x64/dbg_dts
index badccca0275..d4c9ce7c9c7 100644
--- a/test/db/archos/windows-x64/dbg_dts
+++ b/test/db/archos/windows-x64/dbg_dts
@@ -17,7 +17,7 @@ dr rflags
 dr rax
 doc
 EOF
-REGEXP_FILTER_OUT=(0x[0-9A-Za-z]+\n)
+REGEXP_FILTER_OUT=(0x[0-9A-Za-z]+)
 EXPECT=<<EOF
 0x0000000000000200
 0x0000000000000001
diff --git a/test/db/cmd/cmd_http_post b/test/db/cmd/cmd_http_post
index c890107cce1..3ee5bd30058 100644
--- a/test/db/cmd/cmd_http_post
+++ b/test/db/cmd/cmd_http_post
@@ -7,4 +7,4 @@ REGEXP_FILTER_OUT=(Test succ.+)
 EXPECT=<<EOF
 Test succeeded
 EOF
-RUN
\ No newline at end of file
+RUN
diff --git a/test/db/cmd/cmd_pd2 b/test/db/cmd/cmd_pd2
index 26019759d81..10624735add 100644
--- a/test/db/cmd/cmd_pd2
+++ b/test/db/cmd/cmd_pd2
@@ -667,7 +667,7 @@ EXPECT=<<EOF
        [36m|[0m[36m|[0m   [32m0x00004214[0m      [1;92mcall[0m[37m  [0m[33m0x13c50[0m[0m[0m
        [36m|[0m[36m`[0m[36m-[0m[36m>[0m [32m0x0000421a[0m      [37mmov[0m[37m   [0m[37mqword[0m[37m [[0m[33m0x000232b0[0m[37m], [0m[33m0x50[0m[0m[31m             ; 'P'
        [36m|[0m    [31m                                                           ; [0x232b0:8]=0[0m
-       [36m|[0m    [32m0x00004225[0m      [37mmov[0m[37m   [0m[36mrax[0m[37m, [0m[37mqword[0m[37m [33mstr.COLUMNS[0m[0m[0m[31m               ; [[31m0x18a22[31m:8]=0x534e4d554c4f43[31m ; "COLUMNS"[0m
+       [36m|[0m    [32m0x00004225[0m      [37mmov[0m[37m   [0m[36mrax[0m[37m, [0m[37mqword[0m[37m [[0m[33mstr.COLUMNS[0m[37m][0m[0m[31m             ; [[31m0x18a22[31m:8]=0x534e4d554c4f43[31m ; "COLUMNS"[0m
 
             [32m0x000041ee[0m      [37mmov[0m[37m   [0m[36mrax[0m[37m, [0m[37mqword[0m[37m [[0m[36mrip[0m[37m [0m[37m+[0m[37m [0m[33m0x1d84b[0m[37m][0m[0m[31m           ; [[31m0x21a40[31m:8]=0x18d31 str.literal[0m
             [32m0x00004225[0m      [37mmov[0m[37m   [0m[36mrax[0m[37m, [0m[37mqword[0m[37m [[0m[36mrip[0m[37m [0m[37m+[0m[37m [0m[33m0x147f6[0m[37m][0m[0m[31m           ; str.COLUMNS
@@ -709,7 +709,7 @@ EXPECT=<<EOF
        [36m|[0m[36m|[0m   [32m0x00004214[0m      [1;92mcall[0m[37m  [0m[33m0x13c50[0m[0m[0m
        [36m|[0m[36m`[0m[36m-[0m[36m>[0m [32m0x0000421a[0m      [37mmov[0m[37m   [0m[37mqword[0m[37m [[0m[33m0x000232b0[0m[37m], [0m[33m0x50[0m[0m[31m             ; 'P'
        [36m|[0m    [31m                                                           ; [0x232b0:8]=0[0m
-       [36m|[0m    [32m0x00004225[0m      [36mcmp[0m[37m   [0m[36mrdi[0m[37m, [0m[37mqword[0m[37m [33mstr.COLUMNS[0m[0m[0m[31m               ; [[31m0x18a22[31m:8]=0x534e4d554c4f43[31m ; "COLUMNS"[0m
+       [36m|[0m    [32m0x00004225[0m      [36mcmp[0m[37m   [0m[36mrdi[0m[37m, [0m[37mqword[0m[37m [[0m[33mstr.COLUMNS[0m[37m][0m[0m[31m             ; [[31m0x18a22[31m:8]=0x534e4d554c4f43[31m ; "COLUMNS"[0m
 
             [32m0x000041ee[0m      [36mcmp[0m[37m   [0m[36mrsi[0m[37m, [0m[37mqword[0m[37m [[0m[36mrip[0m[37m [0m[37m+[0m[37m [0m[33m0x1d84b[0m[37m][0m[0m[31m           ; [[31m0x21a40[31m:8]=0x18d31 str.literal[0m
             [32m0x00004225[0m      [36mcmp[0m[37m   [0m[36mrdi[0m[37m, [0m[37mqword[0m[37m [[0m[36mrip[0m[37m [0m[37m+[0m[37m [0m[33m0x147f6[0m[37m][0m[0m[31m           ; str.COLUMNS
diff --git a/test/db/formats/pdb b/test/db/formats/pdb
index 747aca9a671..4863eb96546 100644
--- a/test/db/formats/pdb
+++ b/test/db/formats/pdb
@@ -531,7 +531,7 @@ mkdir .tmp
 idpx bins/pdb/basic32.pd_ .tmp
 !rz-hash -a md5 .tmp/basic32.pdb
 EOF
-REGEXP_FILTER_ERR=(ERROR:.+\nINFO:.+\n)
+REGEXP_FILTER_ERR=(ERROR:.+\nINFO:.+)
 EXPECT_ERR=<<EOF
 ERROR: The pdb file bins/pdb/basic32.pd_ seems to be compressed, please use idpx command to extract the contents.
 INFO: cab_extract: extracted .tmp/basic32.pdb
diff --git a/test/db/formats/pyc b/test/db/formats/pyc
index 211feb595c5..ed14932db43 100644
--- a/test/db/formats/pyc
+++ b/test/db/formats/pyc
@@ -7,7 +7,7 @@ EXPECT=<<EOF
 machine  Python v3.10.0 VM (rev bfb376ffcc4260feb9bf1b9a110559b1ff31da80)
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -23,7 +23,7 @@ EXPECT=<<EOF
 machine  Python v3.9.0 VM (rev 3a819e5c9d3feae85762ae87cebf55f9c8d2b526)
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -39,7 +39,7 @@ EXPECT=<<EOF
 machine  Python v3.8.0 VM (rev 5d714034866ce1e9f89dc141fe4cc0b50cf20a8e)
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -55,7 +55,7 @@ EXPECT=<<EOF
 machine  Python v3.7.0 VM (rev ae1f6af15f3e4110616801e235873e47fd7d1977)
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -71,7 +71,7 @@ EXPECT=<<EOF
 machine  Python v3.6.0 VM (rev 5c4568a05a0a62b5947c55f68f9f2ecfb90a4f12)
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -87,7 +87,7 @@ EXPECT=<<EOF
 machine  Python 2.7a2+ VM (rev edfed0e32cedf3b84c6e999052486a750a3f5bee)
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -103,7 +103,7 @@ EXPECT=<<EOF
 9
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -125,7 +125,7 @@ EXPECT=<<EOF
 0x00002356 0x0   0x00002356 0x0   0x0   ---- module_.Batman.sing             
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -147,7 +147,7 @@ EXPECT=<<EOF
 0x00002326 0x4   0x00002326 0x4   0x0   ---- module_.Batman.sing             
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -169,7 +169,7 @@ EXPECT=<<EOF
 0x00002235 0x4   0x00002235 0x4   0x0   ---- module_.Batman.sing             
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -195,7 +195,7 @@ EXPECT=<<EOF
 0x0000002a 0x0000002a ---------- ---------- program
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -225,7 +225,7 @@ EXPECT=<<EOF
             0x00000040      NOP
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -259,7 +259,7 @@ EXPECT=<<EOF
             0x00001ed1      RETURN_VALUE
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -289,7 +289,7 @@ EXPECT=<<EOF
         `-> 0x00000040      POP_TOP
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -323,7 +323,7 @@ EXPECT=<<EOF
             0x00001e97      RETURN_VALUE
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -353,7 +353,7 @@ EXPECT=<<EOF
         `-> 0x0000003c      POP_TOP
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -369,7 +369,7 @@ EXPECT=<<EOF
 1   0x00000052 0x00000052 NONE FUNC 9        <module>.hello_world
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -397,7 +397,7 @@ EXPECT=<<EOF
             0x00000033      LOAD_CONST            None
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
@@ -428,7 +428,7 @@ EXPECT=<<EOF
 \           0x00000036      RETURN_VALUE
 EOF
 REGEXP_FILTER_ERR=<<EOF
-free_object\ \(0\)
+free_object\s\(0\)
 EOF
 EXPECT_ERR=<<EOF
 free_object (0)
diff --git a/test/unit/test_regex.c b/test/unit/test_regex.c
index fcb450e5b27..7534b7b88c2 100644
--- a/test/unit/test_regex.c
+++ b/test/unit/test_regex.c
@@ -2,50 +2,107 @@
 // SPDX-FileCopyrightText: 2022 GustavoLCR <gugulcr@gmail.com>
 // SPDX-License-Identifier: LGPL-3.0-only
 
-#include <rz_regex.h>
 #include "minunit.h"
+#include <rz_util/rz_regex.h>
+#include <rz_util/rz_strbuf.h>
+#include <rz_util/rz_str.h>
+#include <rz_vector.h>
 
-bool exec_regex(RzRegex *regex, const char *str, RzRegexMatch *out) {
-	RzRegexMatch match[2];
-	mu_assert_true(rz_regex_exec(regex, str, 1, &match[0], 0) == 0, "Regex match failed");
-	mu_assert_true(rz_regex_exec(regex, str, 1, &match[1], RZ_REGEX_LARGE) == 0, "Regex match failed for large engine");
-	mu_assert_memeq((ut8 *)&match[0], (ut8 *)&match[1], sizeof(RzRegexMatch), "Results from large engine match does not equal small engine match");
-	*out = match[0];
+bool exec_regex(RzRegex *regex, const char *str, RzRegexMatch **out) {
+	RzPVector *matches = rz_regex_match_all_not_grouped(regex, str, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
+	if (!matches || rz_pvector_empty(matches)) {
+		return false;
+	}
+	*out = (RzRegexMatch *)rz_pvector_at(matches, 0);
 	return true;
 }
 
+bool test_rz_regex_all_match(void) {
+	RzRegex *reg = rz_regex_new("push", RZ_REGEX_EXTENDED, 0);
+	mu_assert_notnull(reg, "Regex was NULL");
+	RzRegexMatch *match = NULL;
+	mu_assert_true(exec_regex(reg, "push", &match), "Regex match failed");
+	mu_assert_notnull(match, "match was not set");
+	mu_assert_eq(match->start, 0, "Start of match is not 0");
+	mu_assert_eq(match->len, 4, "Len of match is not 4");
+	rz_regex_free(reg);
+	mu_end;
+}
+
+bool test_rz_regex_extend_space(void) {
+	RzRegex *reg = rz_regex_new("push esi", RZ_REGEX_DEFAULT, 0);
+	mu_assert_notnull(reg, "Regex was NULL");
+	RzRegexMatch *match = NULL;
+	mu_assert_notnull(reg, "Regex was NULL");
+	mu_assert_true(exec_regex(reg, "push esi", &match), "Regex match failed. Was ' ' replaced with \\s in the pattern?");
+	mu_assert_notnull(match, "match was not set");
+	mu_assert_eq(match->start, 0, "Start of match is not 0");
+	mu_assert_eq(match->len, 8, "Len of match is not 8");
+	rz_regex_free(reg);
+	mu_end;
+}
+
+bool test_rz_regex_all_to_str(void) {
+	RzRegex *reg = rz_regex_new("123", RZ_REGEX_EXTENDED, 0);
+	mu_assert_notnull(reg, "Regex was NULL");
+	RzStrBuf *res = rz_regex_full_match_str("(123)", "123 123 123", RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_DEFAULT, RZ_REGEX_DEFAULT, "\n");
+	char *str = rz_strbuf_drain(res);
+	mu_assert_streq(str, "123\n123\n123", "String match failed.");
+	free(str);
+
+	res = rz_regex_full_match_str("(123)", "123", RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_DEFAULT, RZ_REGEX_DEFAULT, "\n");
+	str = rz_strbuf_drain(res);
+	mu_assert_streq(str, "123", "String match failed.");
+	free(str);
+
+	res = rz_regex_full_match_str("(123)", "", RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_DEFAULT, RZ_REGEX_DEFAULT, "\n");
+	str = rz_strbuf_drain(res);
+	mu_assert_streq(str, "", "String match failed.");
+	free(str);
+	rz_regex_free(reg);
+	mu_end;
+}
+
 bool test_rz_reg_exec(void) {
 	const char *p = "abc|123";
-	RzRegex *reg = rz_regex_new(p, "e");
+	RzRegex *reg = rz_regex_new(p, RZ_REGEX_EXTENDED, 0);
 	mu_assert_notnull(reg, "Regex was NULL");
-	RzRegexMatch match;
+	RzRegexMatch *match = NULL;
 	mu_assert_true(exec_regex(reg, "abc", &match), "Regex match failed");
-	mu_assert_eq(match.rm_so, 0, "Start of match is not 0");
-	mu_assert_eq(match.rm_eo, 3, "Start of match is not 3");
+	mu_assert_notnull(match, "match was not set");
+	mu_assert_eq(match->start, 0, "Start of match is not 0");
+	mu_assert_eq(match->len, 3, "Len of match is not 3");
 	mu_assert_true(exec_regex(reg, "zabc", &match), "Regex match failed");
-	mu_assert_eq(match.rm_so, 1, "Start of match is not 1");
-	mu_assert_eq(match.rm_eo, 4, "Start of match is not 4");
+	mu_assert_notnull(match, "match was not set");
+	mu_assert_eq(match->start, 1, "Start of match is not 1");
+	mu_assert_eq(match->len, 3, "Len of match is not 3");
 	mu_assert_true(exec_regex(reg, "abcz", &match), "Regex match failed");
-	mu_assert_eq(match.rm_so, 0, "Start of match is not 0");
-	mu_assert_eq(match.rm_eo, 3, "Start of match is not 3");
+	mu_assert_notnull(match, "match was not set");
+	mu_assert_eq(match->start, 0, "Start of match is not 0");
+	mu_assert_eq(match->len, 3, "Len of match is not 3");
 	mu_assert_true(exec_regex(reg, "123", &match), "Regex match failed");
-	mu_assert_eq(match.rm_so, 0, "Start of match is not 0");
-	mu_assert_eq(match.rm_eo, 3, "Start of match is not 3");
+	mu_assert_notnull(match, "match was not set");
+	mu_assert_eq(match->start, 0, "Start of match is not 0");
+	mu_assert_eq(match->len, 3, "Len of match is not 3");
 	mu_assert_true(exec_regex(reg, "z123", &match), "Regex match failed");
-	mu_assert_eq(match.rm_so, 1, "Start of match is not 1");
-	mu_assert_eq(match.rm_eo, 4, "Start of match is not 4");
+	mu_assert_notnull(match, "match was not set");
+	mu_assert_eq(match->start, 1, "Start of match is not 1");
+	mu_assert_eq(match->len, 3, "Len of match is not 3");
 	mu_assert_true(exec_regex(reg, "123z", &match), "Regex match failed");
-	mu_assert_eq(match.rm_so, 0, "Start of match is not 0");
-	mu_assert_eq(match.rm_eo, 3, "Start of match is not 3");
+	mu_assert_notnull(match, "match was not set");
+	mu_assert_eq(match->start, 0, "Start of match is not 0");
+	mu_assert_eq(match->len, 3, "Len of match is not 3");
 	rz_regex_free(reg);
 	const char *p_big = "\\d+(([abc]*d[efg])+|[123]4[567]+)*|[zyx]+(test)+[mnb]";
-	reg = rz_regex_new(p_big, "e");
+	reg = rz_regex_new(p_big, RZ_REGEX_EXTENDED, 0);
 	mu_assert_true(exec_regex(reg, "z1abcde123z", &match), "Regex match failed");
-	mu_assert_eq(match.rm_so, 1, "Start of match is not 1");
-	mu_assert_eq(match.rm_eo, 7, "Start of match is not 7");
+	mu_assert_notnull(match, "match was not set");
+	mu_assert_eq(match->start, 1, "Start of match is not 1");
+	mu_assert_eq(match->len, 6, "Len of match is not 6");
 	mu_assert_true(exec_regex(reg, "ayztesttestb123z", &match), "Regex match failed");
-	mu_assert_eq(match.rm_so, 1, "Start of match is not 1");
-	mu_assert_eq(match.rm_eo, 12, "Start of match is not 11");
+	mu_assert_notnull(match, "match was not set");
+	mu_assert_eq(match->start, 1, "Start of match is not 1");
+	mu_assert_eq(match->len, 11, "Len of match is not 11");
 	rz_regex_free(reg);
 	mu_end;
 }
@@ -53,38 +110,66 @@ bool test_rz_reg_exec(void) {
 bool test_rz_regex_capture(void) {
 	char *str = "abcd PrefixHello42s xyz";
 
-	RzRegex *re = rz_regex_new("[a-zA-Z]*(H[a-z]+)([0-9]*)s", "e");
+	RzRegex *re = rz_regex_new("[a-zA-Z]*(H[a-z]+)([0-9]*)s", RZ_REGEX_EXTENDED, 0);
 	mu_assert_notnull(re, "regex_new");
 
-	RzRegexMatch groups[4];
-	int r = rz_regex_exec(re, str, RZ_ARRAY_SIZE(groups), groups, 0);
-	mu_assert_eq(r, 0, "regex_exec");
+	RzPVector *matches = rz_regex_match_all_not_grouped(re, str, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
+	mu_assert_true(matches && !rz_pvector_empty(matches), "Regex match failed");
+	mu_assert_eq(rz_pvector_len(matches), 3, "Regex match count failed.");
 
-	mu_assert_eq(groups[0].rm_so, 5, "full match start");
-	mu_assert_eq(groups[0].rm_eo, 19, "full match end");
-	char *s = rz_regex_match_extract(str, &groups[0]);
+	RzRegexMatch *match = rz_pvector_at(matches, 0);
+	mu_assert_eq(match->start, 5, "full match start");
+	mu_assert_eq(match->len, 14, "full match len");
+	char *s = rz_str_ndup(str + match->start, match->len);
 	mu_assert_streq_free(s, "PrefixHello42s", "full match extract");
 
-	mu_assert_eq(groups[1].rm_so, 11, "capture 1 start");
-	mu_assert_eq(groups[1].rm_eo, 16, "capture 1 end");
-	s = rz_regex_match_extract(str, &groups[1]);
+	match = rz_pvector_at(matches, 1);
+	mu_assert_eq(match->start, 11, "capture 1 start");
+	mu_assert_eq(match->len, 5, "capture 1 len");
+	s = rz_str_ndup(str + match->start, match->len);
 	mu_assert_streq_free(s, "Hello", "capture 1 extract");
 
-	mu_assert_eq(groups[2].rm_so, 16, "capture 2 start");
-	mu_assert_eq(groups[2].rm_eo, 18, "capture 2 end");
-	s = rz_regex_match_extract(str, &groups[2]);
+	match = rz_pvector_at(matches, 2);
+	mu_assert_eq(match->start, 16, "capture 2 start");
+	mu_assert_eq(match->len, 2, "capture 2 len");
+	s = rz_str_ndup(str + match->start, match->len);
 	mu_assert_streq_free(s, "42", "capture 2 extract");
 
-	mu_assert_eq(groups[3].rm_so, -1, "capture 3 start");
-	mu_assert_eq(groups[3].rm_eo, -1, "capture 3 end");
-	s = rz_regex_match_extract(str, &groups[3]);
-	mu_assert_null(s, "capture 3 extract");
-
 	rz_regex_free(re);
 	mu_end;
 }
 
+bool test_rz_regex_named_matches(void) {
+	RzRegex *reg = rz_regex_new("(?<proto>^\\w+)(:\\/\\/)(?<domain>\\w+)\\.(?<tdomain>\\w+)", RZ_REGEX_EXTENDED, 0);
+	mu_assert_notnull(reg, "Regex was NULL");
+	mu_assert_streq((char *)rz_regex_get_match_name(reg, 1), "proto", "proto name not set.");
+	mu_assert_streq((char *)rz_regex_get_match_name(reg, 3), "domain", "domain name not set.");
+	mu_assert_streq((char *)rz_regex_get_match_name(reg, 4), "tdomain", "tdomain name not set.");
+
+	RzPVector *matches = rz_regex_match_all_not_grouped(reg, "https://rizin.re", RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
+	mu_assert_true(matches && !rz_pvector_empty(matches), "Regex match failed");
+	mu_assert_eq(rz_pvector_len(matches), 5, "Regex match count failed.");
+
+	RzRegexMatch *match = rz_pvector_at(matches, 0);
+	mu_assert_streq((char *)rz_regex_get_match_name(reg, match->group_idx), "(null)", "(null) was not matched.");
+	match = rz_pvector_at(matches, 1);
+	mu_assert_streq((char *)rz_regex_get_match_name(reg, match->group_idx), "proto", "proto was not matched.");
+	match = rz_pvector_at(matches, 2);
+	mu_assert_streq((char *)rz_regex_get_match_name(reg, match->group_idx), "(null)", "(null) was not matched.");
+	match = rz_pvector_at(matches, 3);
+	mu_assert_streq((char *)rz_regex_get_match_name(reg, match->group_idx), "domain", "domain was not matched.");
+	match = rz_pvector_at(matches, 4);
+	mu_assert_streq((char *)rz_regex_get_match_name(reg, match->group_idx), "tdomain", "tdomain was not matched.");
+
+	rz_regex_free(reg);
+	mu_end;
+}
+
 int main() {
+	mu_run_test(test_rz_regex_all_match);
+	mu_run_test(test_rz_regex_extend_space);
 	mu_run_test(test_rz_reg_exec);
 	mu_run_test(test_rz_regex_capture);
+	mu_run_test(test_rz_regex_all_to_str);
+	mu_run_test(test_rz_regex_named_matches);
 }
diff --git a/test/unit/test_str.c b/test/unit/test_str.c
index c412b5129d5..c33c382013c 100644
--- a/test/unit/test_str.c
+++ b/test/unit/test_str.c
@@ -212,7 +212,7 @@ bool test_rz_str_split_list(void) {
 	rz_list_free(l);
 
 	char s1[] = "Hello  World\tAnd \t Everyone";
-	RzList *l1 = rz_str_split_duplist_n_regex(s1, "[[:blank:]]+", 0, false);
+	RzList *l1 = rz_str_split_duplist_n_regex(s1, "\\s+", 0, false);
 	mu_assert_eq(rz_list_length(l1), 4, "string has been split in 4 items");
 	mu_assert_streq(rz_list_get_n(l1, 0), "Hello", "first item");
 	mu_assert_streq(rz_list_get_n(l1, 1), "World", "second item");
@@ -228,7 +228,7 @@ bool test_rz_str_split_list(void) {
 	rz_list_free(l2);
 
 	char s3[] = "Hello  World\tAnd \t Everyone\t";
-	RzList *l3 = rz_str_split_list_regex(s3, "[[:blank:]]+", 0);
+	RzList *l3 = rz_str_split_list_regex(s3, "\\s+", 0);
 	mu_assert_eq(rz_list_length(l3), 4, "string has been split in 4 items");
 	mu_assert_streq(rz_list_get_n(l3, 0), "Hello", "first item");
 	mu_assert_streq(rz_list_get_n(l3, 1), "World", "second item");