Skip to content

Commit

Permalink
fix(strutil.h): ensure proper constexpr of string hashing (#3901)
Browse files Browse the repository at this point in the history
The underlying farmhash algorithm (which is the basis for
Strutil::strhash, and thus ustring's hash representation) turned out to
not be properly constexpr under certain circumstances and needed to be
expressed a different way to be fully constexpr on all platforms and for
all strings.

New implementation suggested by Alex Wells.

Also, make sure string_view ctr from char* can be constexpr, even for
C++14. This is related for the sake of the strhash calls that, when
taking string literals, actually choose the variety that accepts a
string_view.

Signed-off-by: Larry Gritz <[email protected]>
  • Loading branch information
lgritz committed Jul 1, 2023
1 parent dfa76c7 commit 8e8dab1
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 11 deletions.
72 changes: 64 additions & 8 deletions src/include/OpenImageIO/detail/farmhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,21 +239,77 @@ STATIC_INLINE uint32_t Fetch32(const char *p) {

#else

template <typename T>
STATIC_INLINE T FetchType(const char *p) {
T result = 0;
for (size_t i = 0; i < sizeof(T); i++)
reinterpret_cast<char *>(&result)[i] = p[i];
return result;
#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L
// C++20 compiler with constexpr support std::bitcast
STATIC_INLINE uint64_t Fetch64(const char *p) {
struct BlockOfBytes {
char bytes[8];
};
BlockOfBytes bob{p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]};
return std::bit_cast<uint64_t>(bob);
}
STATIC_INLINE uint32_t Fetch32(const char *p) {
struct BlockOfBytes {
char bytes[4];
};
BlockOfBytes bob{p[0],p[1],p[2],p[3]};
return std::bit_cast<uint32_t>(bob);
}

#else

// constexpr supported for C++14,17 versions that manually load bytes and
// bitshift into proper order for the unsigned integer.
// NOTE: bigendian untested
STATIC_INLINE uint64_t Fetch64(const char *p) {
return FetchType<uint64_t>(p);
// Favor letting uint8_t construction to handle
// signed to unsigned conversion vs. uint64_t(p[0]&0xff)
uint8_t b0 = p[0];
uint8_t b1 = p[1];
uint8_t b2 = p[2];
uint8_t b3 = p[3];
uint8_t b4 = p[4];
uint8_t b5 = p[5];
uint8_t b6 = p[6];
uint8_t b7 = p[7];
return littleendian() ?
(uint64_t(b0)) | // LSB
(uint64_t(b1) << 8) |
(uint64_t(b2) << 16) |
(uint64_t(b3) << 24) |
(uint64_t(b4) << 32) |
(uint64_t(b5) << 40) |
(uint64_t(b6) << 48) |
(uint64_t(b7) << 56) // MSB
: // Big Endian byte order
(uint64_t(b0) << 56) | // MSB
(uint64_t(b1) << 48) |
(uint64_t(b2) << 40) |
(uint64_t(b3) << 32) |
(uint64_t(b4) << 24) |
(uint64_t(b5) << 16) |
(uint64_t(b6) << 8) |
(uint64_t(b7)) ; // LSB
}

STATIC_INLINE uint32_t Fetch32(const char *p) {
return FetchType<uint32_t>(p);
uint8_t b0 = p[0];
uint8_t b1 = p[1];
uint8_t b2 = p[2];
uint8_t b3 = p[3];
return littleendian() ?
(uint32_t(b0)) | // LSB
(uint32_t(b1) << 8) |
(uint32_t(b2) << 16) |
(uint32_t(b3) << 24) // MSB
: // Big Endian byte order
(uint32_t(b0) << 24) | // MSB
(uint32_t(b1) << 16) |
(uint32_t(b2) << 8) |
(uint32_t(b3)); // LSB
}
#endif


// devices don't seem to have bswap_64() or bswap_32()
template<typename T>
Expand Down
19 changes: 16 additions & 3 deletions src/include/OpenImageIO/string_view.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,8 @@ class basic_string_view {
: m_chars(chars), m_len(len) { }

/// Construct from char*, use strlen to determine length.
OIIO_CONSTEXPR17 basic_string_view(const CharT* chars) noexcept
: m_chars(chars), m_len(chars ? Traits::length(chars) : 0) { }
// N.B. char_traits::length() is constexpr starting with C++17.
constexpr basic_string_view(const CharT* chars) noexcept
: m_chars(chars), m_len(chars ? cestrlen(chars) : 0) { }

/// Construct from std::string. Remember that a string_view doesn't have
/// its own copy of the characters, so don't use the `string_view` after
Expand Down Expand Up @@ -431,6 +430,20 @@ class basic_string_view {
return last;
}

// Guaranteed constexpr length of a C string
static constexpr size_t cestrlen(const charT* chars) {
#if OIIO_CPLUSPLUS_VERSION >= 17
return Traits::length(chars);
#else
if (chars == nullptr)
return 0;
size_t len = 0;
while (chars[len] != 0)
len++;
return len;
#endif
}

class traits_eq {
public:
constexpr traits_eq (CharT ch) noexcept : ch(ch) {}
Expand Down
3 changes: 3 additions & 0 deletions src/libutil/strutil_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,9 @@ test_hash()
OIIO_CHECK_EQUAL(strhash(std::string("foo")), 6150913649986995171);
OIIO_CHECK_EQUAL(strhash(string_view("foo")), 6150913649986995171);
OIIO_CHECK_EQUAL(strhash(""), 0); // empty string hashes to 0
// Check longer hash and ensure that it's really constexpr
constexpr size_t hash = Strutil::strhash("much longer string");
OIIO_CHECK_EQUAL(hash, 16257490369375554819ULL);
}


Expand Down

0 comments on commit 8e8dab1

Please sign in to comment.