From 639f391b3f9e15a8f1f47dc57e414ba1e5f7397c Mon Sep 17 00:00:00 2001 From: Anil Mahtani <929854+Anilm3@users.noreply.github.com> Date: Mon, 4 Mar 2024 16:20:02 +0000 Subject: [PATCH] Minor improvements --- src/uri_utils.cpp | 121 ++++++++++++++++++--------------------- tests/ip_test.cpp | 57 +++++++++++++----- tests/uri_utils_test.cpp | 119 ++++++++++++++++++++++++++------------ 3 files changed, 178 insertions(+), 119 deletions(-) diff --git a/src/uri_utils.cpp b/src/uri_utils.cpp index 10a53f8f0..9b72923a3 100644 --- a/src/uri_utils.cpp +++ b/src/uri_utils.cpp @@ -73,36 +73,33 @@ enum class token_type { }; constexpr const auto &npos = std::string_view::npos; -inline bool isunreserved(char c) +inline bool is_unreserved(char c) { return ddwaf::isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~'; } -inline bool issubdelim(char c) +inline bool is_subdelim(char c) { return c == '!' || c == '$' || c == '&' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '*' || c == '+' || c == ',' || c == ';' || c == '='; } -// TODO validate percent-encoding -// ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +// TODO validate percent-encoding? inline bool is_scheme_char(char c) { return ddwaf::isalnum(c) || c == '.' || c == '-' || c == '+'; } -inline bool is_host_char(char c) { return isunreserved(c) || issubdelim(c) || c == '%'; } +inline bool is_host_char(char c) { return is_unreserved(c) || is_subdelim(c) || c == '%'; } inline bool is_path_char(char c) { - return isunreserved(c) || issubdelim(c) || c == '%' || c == ':' || c == '@'; + return is_unreserved(c) || is_subdelim(c) || c == '%' || c == ':' || c == '@'; } inline bool is_query_char(char c) { return is_path_char(c) || c == '/' || c == '?'; } inline bool is_frag_char(char c) { return is_path_char(c) || c == '/' || c == '?'; } -// *( unreserved / pct-encoded / sub-delims / ":" ) inline bool is_userinfo_char(char c) { - return isunreserved(c) || issubdelim(c) || c == ':' || c == '%'; + return is_unreserved(c) || is_subdelim(c) || c == ':' || c == '%'; } -// *( unreserved / pct-encoded / sub-delims ) -inline bool isregname(char c) { return isunreserved(c) || issubdelim(c) || c == '%'; } +inline bool is_regname_char(char c) { return is_unreserved(c) || is_subdelim(c) || c == '%'; } } // namespace @@ -112,23 +109,20 @@ std::optional uri_parse(std::string_view uri) decomposed.raw = uri; auto expected_token = token_type::scheme; - token_type lookahead_token; + auto lookahead_token = token_type::none; - std::size_t i = 0; - std::size_t token_begin = npos; + // Authority helpers + std::size_t authority_end = npos; + std::string_view authority_substr; - // Only used for the authority - std::size_t token_end = npos; - std::string_view token_substr; - - while (i < uri.size()) { + for (std::size_t i = 0; i < uri.size();) { // Dead man's switch auto current_token = expected_token; expected_token = token_type::none; switch (current_token) { case token_type::scheme: { - token_begin = i; + auto token_begin = i; if (!isalpha(uri[i++])) { // The URI is malformed as the first character must be alphabetic return std::nullopt; @@ -166,19 +160,11 @@ std::optional uri_parse(std::string_view uri) // Otherwise we expect a path (path-absolute, path-rootless, path-empty) expected_token = token_type::path_no_authority; } - token_begin = i; break; } case token_type::path_no_authority: { + auto token_begin = i; // The path can be empty but we wouldn't be here... - if (uri[i] == '/') { - // if the path starts with a forward-slash, the next character - // can't be another forward-slash, but rather a valid path char - if (++i < uri.size() && !is_path_char(uri[i])) { - return std::nullopt; - } - } - while (i < uri.size()) { const auto c = uri[i++]; if (!is_path_char(c) && c != '/') { @@ -187,20 +173,17 @@ std::optional uri_parse(std::string_view uri) } decomposed.path_index = token_begin; - if (token_begin == i) { - decomposed.path = ""; // Empty path - } else { - decomposed.path = uri.substr(token_begin, i - token_begin); - } + decomposed.path = uri.substr(token_begin, i - token_begin); // We're done, nothing else to parse return decomposed; } case token_type::authority: { - token_end = uri.find_first_of("/?#", i); - if (token_end != npos) { + auto token_begin = i; + authority_end = uri.find_first_of("/?#", i); + if (authority_end != npos) { // The authority is empty - const auto c = uri[token_end]; + const auto c = uri[authority_end]; if (c == '/') { lookahead_token = token_type::path; } else if (c == '?') { @@ -209,21 +192,21 @@ std::optional uri_parse(std::string_view uri) lookahead_token = token_type::fragment; } } else { - token_end = uri.size(); + authority_end = uri.size(); } - if (token_end > i) { + if (authority_end > i) { // The substring starts on 0 to ensure that indices are correct - token_substr = uri.substr(0, token_end); - if (token_substr.find('@', i) != npos) { + authority_substr = uri.substr(0, authority_end); + if (authority_substr.find('@', i) != npos) { expected_token = token_type::userinfo; } else { expected_token = token_type::host; } decomposed.authority.index = token_begin; - decomposed.authority.raw = uri.substr(token_begin, token_end - token_begin); - decomposed.scheme_and_authority = uri.substr(0, token_end); + decomposed.authority.raw = uri.substr(token_begin, authority_end - token_begin); + decomposed.scheme_and_authority = uri.substr(0, authority_end); } else { expected_token = lookahead_token; } @@ -231,10 +214,9 @@ std::optional uri_parse(std::string_view uri) break; } case token_type::userinfo: { + auto token_begin = i; // Find any unexpected characters, technically the ':' is valid and the // password is deprecated so allow one or more instances of it. - // ALPHA / DIGIT / "-" / "." / "_" / "~" / "!" / "$" / "&" / "'" / - // "(" / ")" / "*" / "+" / "," / ";" / "=" / "%" while (i < uri.size()) { const auto c = uri[i++]; @@ -244,7 +226,7 @@ std::optional uri_parse(std::string_view uri) decomposed.authority.userinfo = uri.substr(token_begin, i - token_begin - 1); token_begin = i; - if (i == token_end) { + if (i == authority_end) { expected_token = lookahead_token; } else { expected_token = token_type::host; @@ -265,11 +247,11 @@ std::optional uri_parse(std::string_view uri) if (uri[i] == '[') { expected_token = token_type::ipv6address; } else if (uri[i] == ':') { // Empty host - token_begin = ++i; + ++i; expected_token = token_type::port; } else if (is_host_char(uri[i])) { expected_token = token_type::regname_or_ipv4address; - } else if (token_end != uri.size()) { + } else if (authority_end != uri.size()) { expected_token = lookahead_token; } else { // Not a valid character, malformed @@ -278,13 +260,14 @@ std::optional uri_parse(std::string_view uri) break; } case token_type::regname_or_ipv4address: { + auto token_begin = i; // Reg name or IPv4 host - for (; i < token_end; ++i) { + for (; i < authority_end; ++i) { const auto c = uri[i]; if (c == ':') { /* Port */ break; } - if (!isregname(c)) { + if (!is_regname_char(c)) { // Unexpected character, find the port and exit return std::nullopt; } @@ -313,6 +296,7 @@ std::optional uri_parse(std::string_view uri) break; } case token_type::ipv6address: { + auto token_begin = i; // Validate if this is an IPv6 host bool end_found = false; for (i += 1; i < uri.size(); ++i) { @@ -344,7 +328,7 @@ std::optional uri_parse(std::string_view uri) decomposed.authority.ipv6_host = true; token_begin = ++i; - if (token_begin == token_end) { + if (token_begin == authority_end) { // Keep the next token as it can be the beginning of the // path which has to be kept expected_token = lookahead_token; @@ -359,15 +343,15 @@ std::optional uri_parse(std::string_view uri) break; } case token_type::port: { - token_begin = i; - for (; i < token_end; ++i) { + auto token_begin = i; + for (; i < authority_end; ++i) { if (!ddwaf::isdigit(uri[i])) { return std::nullopt; } } decomposed.authority.port = uri.substr(token_begin, i - token_begin); - if (token_end == uri.size()) { + if (authority_end == uri.size()) { return decomposed; } @@ -377,7 +361,7 @@ std::optional uri_parse(std::string_view uri) break; } case token_type::path: { - token_begin = i; + auto token_begin = i; for (; i < uri.size(); ++i) { const auto c = uri[i]; if (c == '?') { @@ -401,12 +385,12 @@ std::optional uri_parse(std::string_view uri) return decomposed; } - decomposed.path = decomposed.raw.substr(token_begin, i - token_end); + decomposed.path = decomposed.raw.substr(token_begin, i - token_begin); break; } case token_type::query: { // Skip '?' - token_begin = ++i; + auto token_begin = ++i; for (; i < uri.size(); ++i) { const auto c = uri[i]; if (c == '#') { @@ -419,19 +403,21 @@ std::optional uri_parse(std::string_view uri) } } - // TODO check that i != token_begin? - decomposed.query_index = token_begin; - if (i >= uri.size()) { - decomposed.query = decomposed.raw.substr(token_begin); - return decomposed; - } + // Ignore empty query + if (i > token_begin) { + decomposed.query_index = token_begin; + if (i >= uri.size()) { + decomposed.query = decomposed.raw.substr(token_begin); + return decomposed; + } - decomposed.query = decomposed.raw.substr(token_begin, i - token_begin); + decomposed.query = decomposed.raw.substr(token_begin, i - token_begin); + } break; } case token_type::fragment: { // Skip '#' - token_begin = ++i; + auto token_begin = ++i; for (; i < uri.size(); ++i) { const auto c = uri[i]; if (!is_frag_char(c)) { @@ -439,8 +425,11 @@ std::optional uri_parse(std::string_view uri) } } - decomposed.fragment_index = token_begin; - decomposed.fragment = uri.substr(token_begin); + // Ignore empty fragment + if (i > token_begin) { + decomposed.fragment_index = token_begin; + decomposed.fragment = uri.substr(token_begin); + } return decomposed; } case token_type::none: diff --git a/tests/ip_test.cpp b/tests/ip_test.cpp index 6b27ca569..a6ef3c751 100644 --- a/tests/ip_test.cpp +++ b/tests/ip_test.cpp @@ -11,13 +11,25 @@ namespace { TEST(TestIP, ParsingIPv4) { - ddwaf::ipaddr ip{}; - EXPECT_TRUE(ddwaf::parse_ip("1.2.3.4", ip)); - EXPECT_EQ(ip.type, ddwaf::ipaddr::address_family::ipv4); - EXPECT_EQ(ip.data[0], 1); - EXPECT_EQ(ip.data[1], 2); - EXPECT_EQ(ip.data[2], 3); - EXPECT_EQ(ip.data[3], 4); + { + ddwaf::ipaddr ip{}; + EXPECT_TRUE(ddwaf::parse_ip("1.2.3.4", ip)); + EXPECT_EQ(ip.type, ddwaf::ipaddr::address_family::ipv4); + EXPECT_EQ(ip.data[0], 1); + EXPECT_EQ(ip.data[1], 2); + EXPECT_EQ(ip.data[2], 3); + EXPECT_EQ(ip.data[3], 4); + } + + { + ddwaf::ipaddr ip{}; + EXPECT_TRUE(ddwaf::parse_ipv4("1.2.3.4", ip)); + EXPECT_EQ(ip.type, ddwaf::ipaddr::address_family::ipv4); + EXPECT_EQ(ip.data[0], 1); + EXPECT_EQ(ip.data[1], 2); + EXPECT_EQ(ip.data[2], 3); + EXPECT_EQ(ip.data[3], 4); + } } TEST(TestIP, ParsingIPv4Class) @@ -30,16 +42,31 @@ TEST(TestIP, ParsingIPv4Class) TEST(TestIP, ParsingIPv6) { - ddwaf::ipaddr ip{}; + { + ddwaf::ipaddr ip{}; - EXPECT_TRUE(ddwaf::parse_ip("abcd::ef01", ip)); - EXPECT_EQ(ip.type, ddwaf::ipaddr::address_family::ipv6); - EXPECT_EQ(ip.data[0], 0xab); - EXPECT_EQ(ip.data[1], 0xcd); - for (int i = 2; i < 14; ++i) { EXPECT_EQ(ip.data[i], 0); } + EXPECT_TRUE(ddwaf::parse_ip("abcd::ef01", ip)); + EXPECT_EQ(ip.type, ddwaf::ipaddr::address_family::ipv6); + EXPECT_EQ(ip.data[0], 0xab); + EXPECT_EQ(ip.data[1], 0xcd); + for (int i = 2; i < 14; ++i) { EXPECT_EQ(ip.data[i], 0); } + + EXPECT_EQ(ip.data[14], 0xef); + EXPECT_EQ(ip.data[15], 0x01); + } - EXPECT_EQ(ip.data[14], 0xef); - EXPECT_EQ(ip.data[15], 0x01); + { + ddwaf::ipaddr ip{}; + + EXPECT_TRUE(ddwaf::parse_ipv6("abcd::ef01", ip)); + EXPECT_EQ(ip.type, ddwaf::ipaddr::address_family::ipv6); + EXPECT_EQ(ip.data[0], 0xab); + EXPECT_EQ(ip.data[1], 0xcd); + for (int i = 2; i < 14; ++i) { EXPECT_EQ(ip.data[i], 0); } + + EXPECT_EQ(ip.data[14], 0xef); + EXPECT_EQ(ip.data[15], 0x01); + } } TEST(TestIP, ParsingIPv4MappedIPv6) diff --git a/tests/uri_utils_test.cpp b/tests/uri_utils_test.cpp index 6c748743f..ab768900d 100644 --- a/tests/uri_utils_test.cpp +++ b/tests/uri_utils_test.cpp @@ -11,39 +11,48 @@ using namespace std::literals; namespace { -TEST(TestURI, Complete) +TEST(TestURI, Scheme) { { - auto uri = ddwaf::uri_parse( - "http+s.i-a://user@hello.com:1929/path/to/nowhere?query=none#fragment"); - + auto uri = ddwaf::uri_parse("http://"); ASSERT_TRUE(uri); - EXPECT_STRV(uri->scheme, "http+s.i-a"); - EXPECT_STRV(uri->authority.host, "hello.com"); - EXPECT_STRV(uri->authority.userinfo, "user"); - EXPECT_STRV(uri->authority.port, "1929"); - EXPECT_STRV(uri->authority.raw, "user@hello.com:1929"); - EXPECT_STRV(uri->scheme_and_authority, "http+s.i-a://user@hello.com:1929"); - EXPECT_STRV(uri->path, "/path/to/nowhere"); - EXPECT_STRV( - uri->raw, "http+s.i-a://user@hello.com:1929/path/to/nowhere?query=none#fragment"); + EXPECT_STRV(uri->scheme, "http"); + EXPECT_FALSE(uri->authority.ipv6_host); + EXPECT_TRUE(uri->authority.host.empty()); + EXPECT_TRUE(uri->authority.userinfo.empty()); + EXPECT_TRUE(uri->authority.port.empty()); + EXPECT_TRUE(uri->authority.raw.empty()); + EXPECT_TRUE(uri->scheme_and_authority.empty()); + EXPECT_TRUE(uri->path.empty()); + EXPECT_TRUE(uri->query.empty()); + EXPECT_TRUE(uri->fragment.empty()); } { - auto uri = ddwaf::uri_parse("s://u@h:1/p?q#f"); - + auto uri = ddwaf::uri_parse("http:"); ASSERT_TRUE(uri); - EXPECT_STRV(uri->scheme, "s"); - EXPECT_STRV(uri->authority.host, "h"); - EXPECT_STRV(uri->authority.userinfo, "u"); - EXPECT_STRV(uri->authority.port, "1"); - EXPECT_STRV(uri->scheme_and_authority, "s://u@h:1"); - EXPECT_STRV(uri->path, "/p"); - EXPECT_STRV(uri->authority.raw, "u@h:1"); + EXPECT_STRV(uri->scheme, "http"); + EXPECT_FALSE(uri->authority.ipv6_host); + EXPECT_TRUE(uri->authority.host.empty()); + EXPECT_TRUE(uri->authority.userinfo.empty()); + EXPECT_TRUE(uri->authority.port.empty()); + EXPECT_TRUE(uri->authority.raw.empty()); + EXPECT_TRUE(uri->scheme_and_authority.empty()); + EXPECT_TRUE(uri->path.empty()); + EXPECT_TRUE(uri->query.empty()); + EXPECT_TRUE(uri->fragment.empty()); } } -TEST(TestURI, FileLocation) +TEST(TestURI, MalformedScheme) +{ + EXPECT_FALSE(ddwaf::uri_parse("h@@:path")); + EXPECT_FALSE(ddwaf::uri_parse("hhttp,:")); + EXPECT_FALSE(ddwaf::uri_parse("http//")); + EXPECT_FALSE(ddwaf::uri_parse("url.com")); +} + +TEST(TestURI, SchemeAndPath) { { auto uri = ddwaf::uri_parse("file:///usr/lib/libddwaf.so"); @@ -54,6 +63,7 @@ TEST(TestURI, FileLocation) EXPECT_TRUE(uri->authority.userinfo.empty()); EXPECT_TRUE(uri->authority.port.empty()); EXPECT_STRV(uri->path, "/usr/lib/libddwaf.so"); + EXPECT_EQ(uri->path_index, 7); } { @@ -65,6 +75,7 @@ TEST(TestURI, FileLocation) EXPECT_TRUE(uri->authority.userinfo.empty()); EXPECT_TRUE(uri->authority.port.empty()); EXPECT_STRV(uri->path, "/usr/lib/libddwaf.so"); + EXPECT_EQ(uri->path_index, 5); } { @@ -76,9 +87,28 @@ TEST(TestURI, FileLocation) EXPECT_TRUE(uri->authority.userinfo.empty()); EXPECT_TRUE(uri->authority.port.empty()); EXPECT_STRV(uri->path, "/../lib/libddwaf.so"); + EXPECT_EQ(uri->path_index, 5); + } + { + auto uri = ddwaf::uri_parse("file:../lib/libddwaf.so"); + ASSERT_TRUE(uri); + EXPECT_STRV(uri->scheme, "file"); + EXPECT_FALSE(uri->authority.ipv6_host); + EXPECT_TRUE(uri->authority.host.empty()); + EXPECT_TRUE(uri->authority.userinfo.empty()); + EXPECT_TRUE(uri->authority.port.empty()); + EXPECT_STRV(uri->path, "../lib/libddwaf.so"); + EXPECT_EQ(uri->path_index, 5); } } +TEST(TestURI, SchemeInvalidPath) +{ + EXPECT_FALSE(ddwaf::uri_parse("file:[][][]")); + EXPECT_FALSE(ddwaf::uri_parse("file:?query")); + EXPECT_FALSE(ddwaf::uri_parse("file:#fragment")); +} + TEST(TestURI, SchemeHost) { { @@ -304,13 +334,6 @@ TEST(TestURI, EmptyAuthority) } } -TEST(TestURI, MalformedScheme) -{ - ASSERT_FALSE(ddwaf::uri_parse("h@@:path")); - ASSERT_FALSE(ddwaf::uri_parse("hhttp,:")); - ASSERT_FALSE(ddwaf::uri_parse("http//")); -} - TEST(TestURI, MalformedAuthority) { ASSERT_FALSE(ddwaf::uri_parse("http://host:::asdnsk")); @@ -323,16 +346,36 @@ TEST(TestURI, MalformedAuthority) // ASSERT_FALSE(ddwaf::uri_parse("http://something@:123")); } -TEST(TestURI, NoAuthorityOrPath) +TEST(TestURI, Complete) { - /* auto uri = ddwaf::uri_parse("http://");*/ - /*ASSERT_FALSE(uri);*/ -} + { + auto uri = ddwaf::uri_parse( + "http+s.i-a://user@hello.com:1929/path/to/nowhere?query=none#fragment"); -TEST(TestURI, NoScheme) -{ - auto uri = ddwaf::uri_parse("url.com"); - EXPECT_FALSE(uri); + ASSERT_TRUE(uri); + EXPECT_STRV(uri->scheme, "http+s.i-a"); + EXPECT_STRV(uri->authority.host, "hello.com"); + EXPECT_STRV(uri->authority.userinfo, "user"); + EXPECT_STRV(uri->authority.port, "1929"); + EXPECT_STRV(uri->authority.raw, "user@hello.com:1929"); + EXPECT_STRV(uri->scheme_and_authority, "http+s.i-a://user@hello.com:1929"); + EXPECT_STRV(uri->path, "/path/to/nowhere"); + EXPECT_STRV( + uri->raw, "http+s.i-a://user@hello.com:1929/path/to/nowhere?query=none#fragment"); + } + + { + auto uri = ddwaf::uri_parse("s://u@h:1/p?q#f"); + + ASSERT_TRUE(uri); + EXPECT_STRV(uri->scheme, "s"); + EXPECT_STRV(uri->authority.host, "h"); + EXPECT_STRV(uri->authority.userinfo, "u"); + EXPECT_STRV(uri->authority.port, "1"); + EXPECT_STRV(uri->scheme_and_authority, "s://u@h:1"); + EXPECT_STRV(uri->path, "/p"); + EXPECT_STRV(uri->authority.raw, "u@h:1"); + } } } // namespace