Skip to content

Commit

Permalink
Add text_len parameter to all regex functions.
Browse files Browse the repository at this point in the history
This is necessary since we want to allow to search over arbitrary buffer lengths.
  • Loading branch information
Rot127 committed Jan 31, 2024
1 parent 619030e commit edce64e
Show file tree
Hide file tree
Showing 18 changed files with 56 additions and 31 deletions.
2 changes: 1 addition & 1 deletion binrz/rz-test/run.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ RZ_API RzSubprocessOutput *rz_test_run_cmd_test(RzTestRunConfig *config, RzCmdTe

RZ_API bool rz_test_cmp_cmd_output(const char *output, const char *expect, const char *regexp) {
if (regexp) {
RzStrBuf *match_str = rz_regex_full_match_str(regexp, output, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT, "\0");
RzStrBuf *match_str = rz_regex_full_match_str(regexp, output, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT, "\0");
bool equal = RZ_STR_EQ(expect, rz_strbuf_get(match_str));
rz_strbuf_free(match_str);
return equal;
Expand Down
2 changes: 1 addition & 1 deletion binrz/rz-test/rz-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ static void print_diff(const char *actual, const char *expected, const char *reg
const char *output = actual;

if (regexp) {
RzStrBuf *match_str = rz_regex_full_match_str(regexp, actual, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT, "\0");
RzStrBuf *match_str = rz_regex_full_match_str(regexp, actual, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT, "\0");
output = rz_strbuf_drain(match_str);
}

Expand Down
2 changes: 1 addition & 1 deletion librz/asm/asm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1583,7 +1583,7 @@ RZ_API RZ_OWN RzAsmTokenString *rz_asm_tokenize_asm_regex(RZ_BORROW RzStrBuf *as
}

// Search for token pattern.
RzPVector *match_sets = rz_regex_match_all(pattern->regex, asm_str, 0, RZ_REGEX_DEFAULT);
RzPVector *match_sets = rz_regex_match_all(pattern->regex, asm_str, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
void **grouped_match;
rz_pvector_foreach (match_sets, grouped_match) {
if (rz_pvector_empty(*grouped_match)) {
Expand Down
2 changes: 1 addition & 1 deletion librz/cons/less.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ RZ_API int rz_cons_less_str(const char *str, const char *exitkeys) {
break;
}
/* find all occurrences */
RzPVector *matches = rz_regex_match_all_not_grouped(rx, str, 0, RZ_REGEX_DEFAULT);
RzPVector *matches = rz_regex_match_all_not_grouped(rx, str, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
if (rz_pvector_empty(matches)) {
rz_pvector_free(matches);
break;
Expand Down
2 changes: 1 addition & 1 deletion librz/core/casm.c
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ RZ_API RzList /*<RzCoreAsmHit *>*/ *rz_core_asm_strsearch(RzCore *core, const ch
matches = strstr(opst, tokens[matchcount]) != NULL;
} else {
rx = rz_regex_new(tokens[matchcount], RZ_REGEX_EXTENDED, 0);
RzPVector *tmp_m = rz_regex_match_first(rx, opst, 0, RZ_REGEX_DEFAULT);
RzPVector *tmp_m = rz_regex_match_first(rx, opst, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
matches = !rz_pvector_empty(tmp_m) || tmp_m != NULL;
rz_regex_free(rx);
rz_pvector_free(tmp_m);
Expand Down
2 changes: 1 addition & 1 deletion librz/core/cmd/cmd_debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ static bool step_until_inst(RzCore *core, const char *instr, bool regex) {
if (ret > 0) {
const char *buf_asm = rz_asm_op_get_asm(&asmop);
if (regex) {
if (rz_regex_contains(instr, buf_asm, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT)) {
if (rz_regex_contains(instr, buf_asm, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT)) {
RZ_LOG_ERROR("core: esil: stop.\n");
break;
}
Expand Down
2 changes: 1 addition & 1 deletion librz/core/cmd/cmd_search.c
Original file line number Diff line number Diff line change
Expand Up @@ -1081,7 +1081,7 @@ static RzList /*<RzCoreAsmHit *>*/ *construct_rop_gadget(RzCore *core, ut64 addr
idx += opsz;
addr += opsz;
if (rx) {
grep_find = rz_regex_contains(rx, opst, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT);
grep_find = rz_regex_contains(rx, opst, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT);
search_hit = (end && grep && grep_find);
} else {
search_hit = (end && grep && strstr(opst, grep_str));
Expand Down
2 changes: 1 addition & 1 deletion librz/core/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1382,7 +1382,7 @@ static bool find_e_opts(RzCore *core, RzLineCompletion *completion, RzLineBuffer
RzRegex *rx = rz_regex_new(pattern, RZ_REGEX_EXTENDED, 0);
bool ret = false;

RzPVector *matches = rz_regex_match_all_not_grouped(rx, buf->data, 0, RZ_REGEX_DEFAULT);
RzPVector *matches = rz_regex_match_all_not_grouped(rx, buf->data, buf->length, 0, RZ_REGEX_DEFAULT);
if (!matches || rz_pvector_empty(matches) || rz_pvector_len(matches) < 2) {
goto out;
}
Expand Down
15 changes: 12 additions & 3 deletions librz/include/rz_regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
#define RZ_REGEX_PARTIAL_SOFT PCRE2_PARTIAL_SOFT
#define RZ_REGEX_PARTIAL_HARD PCRE2_PARTIAL_HARD

#define RZ_REGEX_UNSET PCRE2_UNSET
#define RZ_REGEX_UNSET PCRE2_UNSET
#define RZ_REGEX_ZERO_TERMINATED PCRE2_ZERO_TERMINATED

typedef int RzRegexStatus; ///< An status number returned by the regex API.
typedef PCRE2_SIZE RzRegexSize; ///< Size of a text or regex. This is the size measured in code width. For UTF-8: bytes.
Expand Down Expand Up @@ -66,25 +67,33 @@ RZ_API void rz_regex_free(RzRegex *regex);
RZ_API void rz_regex_error_msg(RzRegexStatus errcode, RZ_OUT char *errbuf, RzRegexSize errbuf_size);
RZ_API const ut8 *rz_regex_get_match_name(const RzRegex *regex, ut32 name_idx);
RZ_API RzRegexStatus rz_regex_match(const RzRegex *regex, RZ_NONNULL const char *text,
RzRegexSize text_size,
RzRegexSize text_offset,
RzRegexFlags mflags,
RZ_NULLABLE RZ_OUT RzRegexMatchData *mdata);
RZ_API RZ_OWN RzPVector /*<RzRegexMatch>*/ *rz_regex_match_all_not_grouped(
const RzRegex *regex,
RZ_NONNULL const char *text,
RzRegexSize text_size,
RzRegexSize text_offset,
RzRegexFlags mflags);
RZ_API RZ_OWN RzPVector /*<RzRegexMatch>*/ *rz_regex_match_first(
const RzRegex *regex,
RZ_NONNULL const char *text,
RzRegexSize text_size,
RzRegexSize text_offset,
RzRegexFlags mflags);
RZ_API RZ_OWN RzPVector /*<RzVector<RzRegexMatch>>*/ *rz_regex_match_all(
const RzRegex *regex,
RZ_NONNULL const char *text,
RzRegexSize text_size,
RzRegexSize text_offset,
RzRegexFlags mflags);
RZ_API bool rz_regex_contains(const char *pattern, const char *text, RzRegexFlags cflags, RzRegexFlags mflags);
RZ_API RZ_OWN RzStrBuf *rz_regex_full_match_str(const char *pattern, const char *text, RzRegexFlags cflags, RzRegexFlags mflags, RZ_NONNULL const char *separator);
RZ_API bool rz_regex_contains(const char *pattern, const char *text,
RzRegexSize text_size,
RzRegexFlags cflags, RzRegexFlags mflags);
RZ_API RZ_OWN RzStrBuf *rz_regex_full_match_str(const char *pattern, const char *text,
RzRegexSize text_size,
RzRegexFlags cflags, RzRegexFlags mflags, RZ_NONNULL const char *separator);

#endif /* !_REGEX_H_ */
4 changes: 2 additions & 2 deletions librz/magic/softmagic.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ static int check_fmt(RzMagic *ms, struct rz_magic *m) {
if (!re) {
return -1;
}
RzRegexStatus rc = rz_regex_match(re, RZ_MAGIC_DESC, 0, RZ_REGEX_DEFAULT, NULL);
RzRegexStatus rc = rz_regex_match(re, RZ_MAGIC_DESC, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT, NULL);
rz_regex_free(re);
return rc > 0 ? 1 : 0;
}
Expand Down Expand Up @@ -1417,7 +1417,7 @@ static int magiccheck(RzMagic *ms, struct rz_magic *m) {
if (!rx) {
return -1;
}
RzPVector *matches = rz_regex_match_first(rx, (const char *)ms->search.s, 0, RZ_REGEX_DEFAULT);
RzPVector *matches = rz_regex_match_first(rx, (const char *)ms->search.s, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
rz_regex_free(rx);
if (!matches) {
return -1;
Expand Down
2 changes: 1 addition & 1 deletion librz/parse/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ static bool is_lea(const char *asm_str) {
if (!re) {
return false;
}
bool res = rz_regex_match(re, asm_str, 0, RZ_REGEX_DEFAULT, NULL) != RZ_REGEX_ERROR_NOMATCH;
bool res = rz_regex_match(re, asm_str, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT, NULL) != RZ_REGEX_ERROR_NOMATCH;
rz_regex_free(re);
return res;
}
Expand Down
2 changes: 1 addition & 1 deletion librz/parse/p/parse_arm_pseudo.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
if (!var_re) {
return tstr;
}
RzPVector *matches = rz_regex_match_first(var_re, tstr, 0, RZ_REGEX_DEFAULT);
RzPVector *matches = rz_regex_match_first(var_re, tstr, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
if (!matches || rz_pvector_empty(matches)) {
rz_regex_free(var_re);
rz_pvector_free(matches);
Expand Down
2 changes: 1 addition & 1 deletion librz/parse/p/parse_mips_pseudo.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
if (!var_re) {
return tstr;
}
RzPVector *matches = rz_regex_match_first(var_re, tstr, 0, RZ_REGEX_DEFAULT);
RzPVector *matches = rz_regex_match_first(var_re, tstr, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
if (!matches || rz_pvector_empty(matches)) {
rz_regex_free(var_re);
rz_pvector_free(matches);
Expand Down
2 changes: 1 addition & 1 deletion librz/parse/p/parse_x86_pseudo.c
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ static char *subvar_stack(RzParse *p, RzAnalysisOp *op, RZ_NULLABLE RzAnalysisFu
if (!var_re) {
return tstr;
}
RzPVector *matches = rz_regex_match_first(var_re, tstr, 0, RZ_REGEX_DEFAULT);
RzPVector *matches = rz_regex_match_first(var_re, tstr, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
if (!matches || rz_pvector_empty(matches)) {
rz_regex_free(var_re);
rz_pvector_free(matches);
Expand Down
2 changes: 1 addition & 1 deletion librz/search/regexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ RZ_API int rz_search_regexp_update(RzSearch *s, ut64 from, const ut8 *buf, int l
return -1;
}

matches = rz_regex_match_all_not_grouped(compiled, (char *)buf, from, RZ_REGEX_DEFAULT);
matches = rz_regex_match_all_not_grouped(compiled, (char *)buf, len, from, RZ_REGEX_DEFAULT);
void **it;
rz_pvector_foreach (matches, it) {
RzRegexMatch *m = *it;
Expand Down
36 changes: 26 additions & 10 deletions librz/util/regex.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ void rz_regex_match_data_free(RZ_OWN RzRegexMatchData *match_data) {
}

RZ_API RzRegexStatus rz_regex_match(const RzRegex *regex, RZ_NONNULL const char *text,
RzRegexSize text_size,
RzRegexSize text_offset,
RzRegexFlags mflags,
RZ_NULLABLE RZ_OUT RzRegexMatchData *mdata) {
Expand All @@ -99,7 +100,7 @@ RZ_API RzRegexStatus rz_regex_match(const RzRegex *regex, RZ_NONNULL const char
one_time_match = true;
mdata = pcre2_match_data_create_from_pattern(regex, NULL);
}
RzRegexStatus rc = pcre2_match(regex, (PCRE2_SPTR)text, PCRE2_ZERO_TERMINATED, text_offset, mflags | PCRE2_NO_UTF_CHECK, mdata, NULL);
RzRegexStatus rc = pcre2_match(regex, (PCRE2_SPTR)text, text_size, text_offset, mflags | PCRE2_NO_UTF_CHECK, mdata, NULL);
if (one_time_match) {
pcre2_match_data_free(mdata);
}
Expand Down Expand Up @@ -152,15 +153,24 @@ RZ_API const ut8 *rz_regex_get_match_name(const RzRegex *regex, ut32 name_idx) {
/**
* \brief Finds the first match in a text and returns it as a pvector.
* First element in the vector is always the whole match, the following possible groups.
*
* \param The regex pattern to match.
* \param text_size The length of the buffer pointed to by \p text.
* Can be set to RZ_REGEX_ZERO_TERMINATED if the buffer is a zero terminated string.
* \param text_offset The offset into \p text from where the search starts.
* \param mflags Match flags.
*
* \return The matches as pvector. NULL in case of failure. Empty for no matches or regex related errors.
*/
RZ_API RZ_OWN RzPVector /*<RzRegexMatch>*/ *rz_regex_match_first(
const RzRegex *regex,
RZ_NONNULL const char *text,
RzRegexSize text_size,
RzRegexSize text_offset,
RzRegexFlags mflags) {
RzPVector *matches = rz_pvector_new(NULL);
RzRegexMatchData *mdata = pcre2_match_data_create_from_pattern(regex, NULL);
RzRegexStatus rc = pcre2_match(regex, (PCRE2_SPTR)text, PCRE2_ZERO_TERMINATED, text_offset, mflags | PCRE2_NO_UTF_CHECK, mdata, NULL);
RzRegexStatus rc = pcre2_match(regex, (PCRE2_SPTR)text, text_size, text_offset, mflags | PCRE2_NO_UTF_CHECK, mdata, NULL);

if (rc == PCRE2_ERROR_NOMATCH) {
// Nothing matched return empty vector.
Expand Down Expand Up @@ -232,12 +242,13 @@ RZ_API RZ_OWN RzPVector /*<RzRegexMatch>*/ *rz_regex_match_first(
RZ_API RZ_OWN RzPVector /*<RzRegexMatch>*/ *rz_regex_match_all_not_grouped(
const RzRegex *regex,
RZ_NONNULL const char *text,
RzRegexSize text_size,
RzRegexSize next_text_offset,
RzRegexFlags mflags) {
rz_return_val_if_fail(regex && text, NULL);

RzPVector *all_matches = rz_pvector_new(NULL);
RzPVector *matches = rz_regex_match_first(regex, text, next_text_offset, mflags);
RzPVector *matches = rz_regex_match_first(regex, text, text_size, next_text_offset, mflags);
while (matches && rz_pvector_len(matches) > 0) {
RzRegexMatch *whole_match = rz_pvector_head(matches);
next_text_offset = whole_match->start + whole_match->len;
Expand All @@ -249,7 +260,7 @@ RZ_API RZ_OWN RzPVector /*<RzRegexMatch>*/ *rz_regex_match_all_not_grouped(
}
rz_pvector_free(matches);
// Search again after the whole first match.
matches = rz_regex_match_first(regex, text, next_text_offset, mflags);
matches = rz_regex_match_first(regex, text, text_size, next_text_offset, mflags);
}

// Free last vector without matches.
Expand All @@ -263,18 +274,19 @@ RZ_API RZ_OWN RzPVector /*<RzRegexMatch>*/ *rz_regex_match_all_not_grouped(
RZ_API RZ_OWN RzPVector /*<RzVector<RzRegexMatch>>*/ *rz_regex_match_all(
const RzRegex *regex,
RZ_NONNULL const char *text,
RzRegexSize text_size,
RzRegexSize text_offset,
RzRegexFlags mflags) {
rz_return_val_if_fail(regex && text, NULL);

RzPVector *all_matches = rz_pvector_new((RzPVectorFree)rz_pvector_free);
RzPVector *matches = rz_regex_match_first(regex, text, text_offset, mflags);
RzPVector *matches = rz_regex_match_first(regex, text, text_size, text_offset, mflags);
while (matches && rz_pvector_len(matches) > 0) {
rz_pvector_push(all_matches, matches);
RzRegexMatch *m = rz_pvector_head(matches);
// Search again after the last match.
text_offset = m->start + m->len;
matches = rz_regex_match_first(regex, text, text_offset, mflags);
matches = rz_regex_match_first(regex, text, text_size, text_offset, mflags);
}

// Free last vector without matches.
Expand All @@ -285,12 +297,14 @@ RZ_API RZ_OWN RzPVector /*<RzVector<RzRegexMatch>>*/ *rz_regex_match_all(
/**
* \brief Checks if \p pattern can be found in \p text.
*/
RZ_API bool rz_regex_contains(const char *pattern, const char *text, RzRegexFlags cflags, RzRegexFlags mflags) {
RZ_API bool rz_regex_contains(const char *pattern, const char *text,
RzRegexSize text_size,
RzRegexFlags cflags, RzRegexFlags mflags) {
RzRegex *re = rz_regex_new(pattern, cflags, 0);
if (!re) {
return false;
}
RzPVector *matches = rz_regex_match_first(re, text, 0, mflags);
RzPVector *matches = rz_regex_match_first(re, text, text_size, 0, mflags);
bool found = matches != NULL && !rz_pvector_empty(matches);
rz_pvector_free(matches);
return found;
Expand All @@ -300,11 +314,13 @@ RZ_API bool rz_regex_contains(const char *pattern, const char *text, RzRegexFlag
* \brief Searches for a \p pattern in \p text and returns all matches as concatenated string.
*
*/
RZ_API RZ_OWN RzStrBuf *rz_regex_full_match_str(const char *pattern, const char *text, RzRegexFlags cflags, RzRegexFlags mflags, RZ_NONNULL const char *separator) {
RZ_API RZ_OWN RzStrBuf *rz_regex_full_match_str(const char *pattern, const char *text,
RzRegexSize text_size,
RzRegexFlags cflags, RzRegexFlags mflags, RZ_NONNULL const char *separator) {
rz_return_val_if_fail(pattern && text && separator, NULL);
RzRegex *re = rz_regex_new(pattern, cflags, 0);
RzStrBuf *sbuf = rz_strbuf_new("");
RzPVector *matches = rz_regex_match_all_not_grouped(re, text, 0, mflags);
RzPVector *matches = rz_regex_match_all_not_grouped(re, text, text_size, 0, mflags);
if (!matches || !sbuf) {
goto fini;
}
Expand Down
2 changes: 1 addition & 1 deletion librz/util/str.c
Original file line number Diff line number Diff line change
Expand Up @@ -3415,7 +3415,7 @@ static RzList /*<char *>*/ *str_split_list_common_regex(RZ_BORROW char *str, RZ_
int s = 0, e = 0;
int j = 0;
void **it;
RzPVector *matches = rz_regex_match_all(r, str, 0, RZ_REGEX_DEFAULT);
RzPVector *matches = rz_regex_match_all(r, str, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
rz_pvector_foreach (matches, it) {
RzPVector *m = (RzPVector *)*it;
RzRegexMatch *group0 = rz_pvector_head(m);
Expand Down
4 changes: 2 additions & 2 deletions test/unit/test_regex.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include <rz_vector.h>

bool exec_regex(RzRegex *regex, const char *str, RzRegexMatch **out) {
RzPVector *matches = rz_regex_match_all_not_grouped(regex, str, 0, RZ_REGEX_DEFAULT);
RzPVector *matches = rz_regex_match_all_not_grouped(regex, str, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
if (!matches || rz_pvector_empty(matches)) {
return false;
}
Expand Down Expand Up @@ -91,7 +91,7 @@ bool test_rz_regex_capture(void) {
RzRegex *re = rz_regex_new("[a-zA-Z]*(H[a-z]+)([0-9]*)s", RZ_REGEX_EXTENDED, 0);
mu_assert_notnull(re, "regex_new");

RzPVector *matches = rz_regex_match_all_not_grouped(re, str, 0, RZ_REGEX_DEFAULT);
RzPVector *matches = rz_regex_match_all_not_grouped(re, str, RZ_REGEX_ZERO_TERMINATED, 0, RZ_REGEX_DEFAULT);
mu_assert_true(matches && !rz_pvector_empty(matches), "Regex match failed");
mu_assert_eq(rz_pvector_len(matches), 3, "Regex match count failed.");

Expand Down

0 comments on commit edce64e

Please sign in to comment.