Skip to content

Commit

Permalink
Merge pull request #1657 from kevinbackhouse/GHSA-w8mv-g8qq-36mj
Browse files Browse the repository at this point in the history
Fix quadratic complexity performance bug
  • Loading branch information
kevinbackhouse committed May 16, 2021
2 parents 8b53160 + c261fba commit 8c53e46
Showing 1 changed file with 36 additions and 21 deletions.
57 changes: 36 additions & 21 deletions xmpsdk/src/XMPMeta-Parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -976,12 +976,26 @@ ProcessUTF8Portion ( XMLParserAdapter * xmlParser,
{
const XMP_Uns8 * bufEnd = buffer + length;

const XMP_Uns8 * spanStart = buffer;
const XMP_Uns8 * spanEnd;

// `buffer` is copied into this std::string. If `buffer` only
// contains valid UTF-8 and no escape characters, then the copy
// will be identical to the original, but invalid characters are
// replaced - usually with a space character. This std::string was
// added as a performance fix for:
// https://github.com/Exiv2/exiv2/security/advisories/GHSA-w8mv-g8qq-36mj
// Previously, the code was repeatedly calling
// `xmlParser->ParseBuffer()`, which turned out to have quadratic
// complexity, because expat kept reparsing the entire string from
// the beginning.
std::string copy;

for ( spanEnd = spanStart; spanEnd < bufEnd; ++spanEnd ) {
for ( spanEnd = buffer; spanEnd < bufEnd; ++spanEnd ) {

if ( (0x20 <= *spanEnd) && (*spanEnd <= 0x7E) && (*spanEnd != '&') ) continue; // A regular ASCII character.
if ( (0x20 <= *spanEnd) && (*spanEnd <= 0x7E) && (*spanEnd != '&') ) {
copy.push_back(*spanEnd);
continue; // A regular ASCII character.
}

if ( *spanEnd >= 0x80 ) {

Expand All @@ -992,33 +1006,33 @@ ProcessUTF8Portion ( XMLParserAdapter * xmlParser,
if ( uniLen > 0 ) {

// A valid UTF-8 character, keep it as-is.
copy.append((const char*)spanEnd, uniLen);
spanEnd += uniLen - 1; // ! The loop increment will put back the +1.

} else if ( (uniLen < 0) && (! last) ) {

// Have a partial UTF-8 character at the end of the buffer and more input coming.
xmlParser->ParseBuffer ( spanStart, (spanEnd - spanStart), false );
xmlParser->ParseBuffer ( copy.c_str(), copy.size(), false );
return (spanEnd - buffer);

} else {

// Not a valid UTF-8 sequence. Replace the first byte with the Latin-1 equivalent.
xmlParser->ParseBuffer ( spanStart, (spanEnd - spanStart), false );
const char * replacement = kReplaceLatin1 [ *spanEnd - 0x80 ];
xmlParser->ParseBuffer ( replacement, strlen ( replacement ), false );
spanStart = spanEnd + 1; // ! The loop increment will do "spanEnd = spanStart".
copy.append ( replacement );

}

} else if ( (*spanEnd < 0x20) || (*spanEnd == 0x7F) ) {

// Replace ASCII controls other than tab, LF, and CR with a space.

if ( (*spanEnd == kTab) || (*spanEnd == kLF) || (*spanEnd == kCR) ) continue;
if ( (*spanEnd == kTab) || (*spanEnd == kLF) || (*spanEnd == kCR) ) {
copy.push_back(*spanEnd);
continue;
}

xmlParser->ParseBuffer ( spanStart, (spanEnd - spanStart), false );
xmlParser->ParseBuffer ( " ", 1, false );
spanStart = spanEnd + 1; // ! The loop increment will do "spanEnd = spanStart".
copy.push_back(' ');

} else {

Expand All @@ -1030,29 +1044,30 @@ ProcessUTF8Portion ( XMLParserAdapter * xmlParser,
if ( escLen < 0 ) {

// Have a partial numeric escape in this buffer, wait for more input.
if ( last ) continue; // No more buffers, not an escape, absorb as normal input.
xmlParser->ParseBuffer ( spanStart, (spanEnd - spanStart), false );
if ( last ) {
copy.push_back('&');
continue; // No more buffers, not an escape, absorb as normal input.
}
xmlParser->ParseBuffer ( copy.c_str(), copy.size(), false );
return (spanEnd - buffer);

} else if ( escLen > 0 ) {

// Have a complete numeric escape to replace.
xmlParser->ParseBuffer ( spanStart, (spanEnd - spanStart), false );
xmlParser->ParseBuffer ( " ", 1, false );
spanStart = spanEnd + escLen;
spanEnd = spanStart - 1; // ! The loop continuation will increment spanEnd!
copy.push_back(' ');
spanEnd = spanEnd + escLen - 1; // ! The loop continuation will increment spanEnd!

} else {
copy.push_back('&');
}

}

}

XMP_Assert ( spanEnd == bufEnd );

if ( spanStart < bufEnd ) xmlParser->ParseBuffer ( spanStart, (spanEnd - spanStart), false );
if ( last ) xmlParser->ParseBuffer ( " ", 1, true );

copy.push_back(' ');
xmlParser->ParseBuffer ( copy.c_str(), copy.size(), true );
return length;

} // ProcessUTF8Portion
Expand Down

0 comments on commit 8c53e46

Please sign in to comment.