Skip to content

Commit

Permalink
Fix: handle spaces and non-latin characters in filenames in loadtest …
Browse files Browse the repository at this point in the history
…apps. (#949)

Non-latin characters were a problem only for vkloadtests because only it
has a text overlay.
  • Loading branch information
MarkCallow authored Oct 10, 2024
1 parent e304057 commit 8aa500a
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 13 deletions.
129 changes: 123 additions & 6 deletions tests/loadtests/appfwSDL/VulkanAppSDL/vulkantextoverlay.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,113 @@
#define STB_FONT_HEIGHT STB_FONT_consolas_24_latin1_BITMAP_HEIGHT
#define STB_FIRST_CHAR STB_FONT_consolas_24_latin1_FIRST_CHAR
#define STB_NUM_CHARS STB_FONT_consolas_24_latin1_NUM_CHARS
#define STB_MISSING_GLPYH 0x80 // Actually a control character.

// Max. number of chars the text overlay buffer can hold
#define MAX_CHAR_COUNT 1024


/**
* @internal
* @brief Given the lead byte of a UTF-8 sequence returns the expected length of the codepoint
* @param[in] leadByte The lead byte of a UTF-8 sequence
* @return The expected length of the codepoint */
[[nodiscard]] constexpr inline int sequenceLength(uint8_t leadByte) noexcept {
if ((leadByte & 0b1000'0000u) == 0b0000'0000u)
return 1;
if ((leadByte & 0b1110'0000u) == 0b1100'0000u)
return 2;
if ((leadByte & 0b1111'0000u) == 0b1110'0000u)
return 3;
if ((leadByte & 0b1111'1000u) == 0b1111'0000u)
return 4;

return 0;
}

/**
* @internal
* @brief Checks if the codepoint was coded as a longer than required sequence
* @param[in] codepoint The unicode codepoint
* @param[in] length The UTF-8 sequence length
* @return True if the sequence length was inappropriate for the given codepoint */
[[nodiscard]] constexpr inline bool isOverlongSequence(uint32_t codepoint, int length) noexcept {
if (codepoint < 0x80)
return length != 1;
else if (codepoint < 0x800)
return length != 2;
else if (codepoint < 0x10000)
return length != 3;
else
return false;
}

/**
* @internal
* @brief Checks if the codepoint is valid
* @param[in] codepoint The unicode codepoint
* @return True if the codepoint is a valid unicode codepoint */
[[nodiscard]] constexpr inline bool isCodepointValid(uint32_t codepoint) noexcept {
return codepoint <= 0x0010FFFFu
&& !(0xD800u <= codepoint && codepoint <= 0xDBFFu);
}

/**
* @internal
* @brief Safely checks and advances a UTF-8 sequence iterator to the start of the next unicode codepoint
* @param[in] it iterator to be advanced
* @param[in] end iterator pointing to the end of the range
* @return True if the advance operation was successful and the advanced codepoint was a valid UTF-8 sequence */
template <typename Iterator>
[[nodiscard]] constexpr bool advanceUTF8(Iterator& it, Iterator end,
uint32_t& codepoint) noexcept {
if (it == end)
return false;

const auto length = sequenceLength(*it);
if (length == 0)
return false;

if (std::distance(it, end) < length)
return false;

for (int i = 1; i < length; ++i) {
const auto trailByte = *(it + i);
if ((static_cast<uint8_t>(trailByte) & 0b1100'0000u) != 0b1000'0000u)
return false;
}

codepoint = 0;
switch (length) {
case 1:
codepoint |= *it++;
break;
case 2:
codepoint |= (*it++ & 0b0001'1111u) << 6u;
codepoint |= (*it++ & 0b0011'1111u);
break;
case 3:
codepoint |= (*it++ & 0b0000'1111u) << 12u;
codepoint |= (*it++ & 0b0011'1111u) << 6u;
codepoint |= (*it++ & 0b0011'1111u);
break;
case 4:
codepoint |= (*it++ & 0b0000'0111u) << 18u;
codepoint |= (*it++ & 0b0011'1111u) << 12u;
codepoint |= (*it++ & 0b0011'1111u) << 6u;
codepoint |= (*it++ & 0b0011'1111u);
break;
}

if (!isCodepointValid(codepoint))
return false;

if (isOverlongSequence(codepoint, length))
return false;

return true;
}

// Mostly self-contained text overlay class
// todo : comment
class VulkanTextOverlay
Expand Down Expand Up @@ -91,6 +194,7 @@ class VulkanTextOverlay
// todo : throw error
return 0;
}

public:

enum TextAlign { alignLeft, alignCenter, alignRight };
Expand Down Expand Up @@ -573,12 +677,20 @@ class VulkanTextOverlay

// Calculate text width
float textWidth = 0;
for (auto letter : text)
uint32_t codepoint;
auto it = text.begin();
while (it != text.end())
{
stb_fontchar *charData = &stbFontData[(uint32_t)letter - STB_FIRST_CHAR];
if (!advanceUTF8(it, text.end(), codepoint))
break;
// TODO: Get a UTF8 font. Consider changing to Dear ImGUI
// https://github.com/ocornut/imgui.
// Placeholder to avoid crashing.
if (codepoint > STB_NUM_CHARS + STB_FIRST_CHAR)
codepoint = STB_MISSING_GLPYH;
stb_fontchar *charData = &stbFontData[(uint32_t)codepoint - STB_FIRST_CHAR];
textWidth += charData->advance * charW;
}

switch (align)
{
case alignRight:
Expand All @@ -592,9 +704,15 @@ class VulkanTextOverlay
}

// Generate a uv mapped quad per char in the new text
for (auto letter : text)
it = text.begin();
while (it != text.end())
{
stb_fontchar *charData = &stbFontData[(uint32_t)letter - STB_FIRST_CHAR];
if (!advanceUTF8(it, text.end(), codepoint))
break;
if (codepoint > STB_NUM_CHARS + STB_FIRST_CHAR)
codepoint = STB_MISSING_GLPYH;

stb_fontchar *charData = &stbFontData[(uint32_t)codepoint - STB_FIRST_CHAR];

mappedLocal->x = (x + (float)charData->x0 * charW);
mappedLocal->y = (y + (float)charData->y0 * charH);
Expand Down Expand Up @@ -628,7 +746,6 @@ class VulkanTextOverlay
break; // Truncate the text.
}
}

// Unmap buffer and update command buffers
void endTextUpdate()
{
Expand Down
6 changes: 6 additions & 0 deletions tests/loadtests/glloadtests/shader-based/GL3LoadTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
* OpenGL ES 3.x
*/

#include <string>
#include <regex>

#include "GLLoadTests.h"
#include "EncodeTexture.h"
#include "DrawTexture.h"
Expand Down Expand Up @@ -71,6 +74,9 @@ GLLoadTests::showFile(std::string& filename)
createViewer = DrawTexture::create;
}
ktxTexture_Destroy(kTexture);

// Escape any spaces in filename.
filename = std::regex_replace( filename, std::regex(" "), "\\ " );
std::string args = "--external " + filename;
pViewer = createViewer(w_width, w_height, args.c_str(), sBasePath);
return pViewer;
Expand Down
6 changes: 6 additions & 0 deletions tests/loadtests/vkloadtests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,12 @@ add_executable( vkloadtests

set_code_sign(vkloadtests)

# If VulkanAppSDL is ever made into its own target change the target here.
target_compile_features(vkloadtests
PRIVATE
cxx_std_14
)

target_include_directories(vkloadtests
PRIVATE
${SDL2_INCLUDE_DIRS}
Expand Down
5 changes: 5 additions & 0 deletions tests/loadtests/vkloadtests/VulkanLoadTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#define _USE_MATH_DEFINES
#include <math.h>
#include <string>
#include <regex>

#include "VulkanLoadTests.h"
#include "Texture.h"
Expand Down Expand Up @@ -355,6 +357,9 @@ VulkanLoadTests::showFile(std::string& filename)
createViewer = Texture::create;
}
ktxTexture_Destroy(kTexture);

// Escape any spaces in filename.
filename = std::regex_replace( filename, std::regex(" "), "\\ " );
std::string args = "--external " + filename;
pViewer = createViewer(vkctx, w_width, w_height, args.c_str(), sBasePath);
return pViewer;
Expand Down
50 changes: 43 additions & 7 deletions utils/argparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
*/

#include <assert.h>
#include <iostream>
#include "argparser.h"
#include <regex>

using namespace std;

Expand All @@ -30,18 +32,52 @@ using namespace std;
*/
argvector::argvector(const string& sArgs)
{
const string sep(" \t\n\r\v\f");
const string sep(" \t");
// std::regex does not support negative lookbehind assertions. Darn!
// RE to extract argument between string start and separator.
// - Match 0 is whole matching string including separator.
// - Match 1 is the argument.
// - Match 2 is an empty string or a backslash.
// Use negative character class as it is easiest way to handle
// utf-8 file names with non-Latin characters. $ must not be last
// in the character class or it will be taken literally not as
// end of string.
// Use raw literal to avoid excess backslashes.
regex re(R"--(^([^$\\ \t]+)(\\?)(?:[ \t]+|$))--");
size_t pos;

pos = sArgs.find_first_not_of(sep);
assert(pos != string::npos);

do {
size_t epos = sArgs.find_first_of(sep, pos);
size_t len = epos == string::npos ? epos : epos - pos;
push_back(sArgs.substr(pos, len));
pos = sArgs.find_first_not_of(sep, epos);
} while (pos != string::npos);
auto first = sArgs.begin() + pos;
auto last = sArgs.end();
bool continuation = false;
for (smatch sm; regex_search(first, last, sm, re);) {
bool needContinuation = false;
#if DEBUG_REGEX
std::cout << "prefix: " << sm.prefix() << '\n';
std::cout << "suffix: " << sm.suffix() << '\n';
std::cout << "match size: " << sm.size() << '\n';
for(uint32_t i = 0; i < sm.size(); i++) {
std::cout << "match " << i << ": " << "\"" << sm.str(i) << "\"" << '\n';
}
#endif
string arg;
// All this because std::regex does not support negative
// lookbehind assertions.
arg = sm.str(1);
if (*sm[2].first == '\\') {
arg += " ";
needContinuation = true;
}
if (continuation) {
this->back() += arg;
} else {
push_back(arg);
}
continuation = needContinuation;
first = sm.suffix().first;
}
}

/*
Expand Down

0 comments on commit 8aa500a

Please sign in to comment.