-
Notifications
You must be signed in to change notification settings - Fork 278
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix quadratic complexity performance bug.
- Loading branch information
1 parent
8b53160
commit c261fba
Showing
1 changed file
with
36 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -976,12 +976,26 @@ ProcessUTF8Portion ( XMLParserAdapter * xmlParser, | |
{ | ||
const XMP_Uns8 * bufEnd = buffer + length; | ||
|
||
const XMP_Uns8 * spanStart = buffer; | ||
const XMP_Uns8 * spanEnd; | ||
|
||
// `buffer` is copied into this std::string. If `buffer` only | ||
// contains valid UTF-8 and no escape characters, then the copy | ||
// will be identical to the original, but invalid characters are | ||
// replaced - usually with a space character. This std::string was | ||
// added as a performance fix for: | ||
// https://github.com/Exiv2/exiv2/security/advisories/GHSA-w8mv-g8qq-36mj | ||
// Previously, the code was repeatedly calling | ||
// `xmlParser->ParseBuffer()`, which turned out to have quadratic | ||
// complexity, because expat kept reparsing the entire string from | ||
// the beginning. | ||
std::string copy; | ||
|
||
for ( spanEnd = spanStart; spanEnd < bufEnd; ++spanEnd ) { | ||
for ( spanEnd = buffer; spanEnd < bufEnd; ++spanEnd ) { | ||
|
||
if ( (0x20 <= *spanEnd) && (*spanEnd <= 0x7E) && (*spanEnd != '&') ) continue; // A regular ASCII character. | ||
if ( (0x20 <= *spanEnd) && (*spanEnd <= 0x7E) && (*spanEnd != '&') ) { | ||
copy.push_back(*spanEnd); | ||
continue; // A regular ASCII character. | ||
} | ||
|
||
if ( *spanEnd >= 0x80 ) { | ||
|
||
|
@@ -992,33 +1006,33 @@ ProcessUTF8Portion ( XMLParserAdapter * xmlParser, | |
if ( uniLen > 0 ) { | ||
|
||
// A valid UTF-8 character, keep it as-is. | ||
copy.append((const char*)spanEnd, uniLen); | ||
spanEnd += uniLen - 1; // ! The loop increment will put back the +1. | ||
|
||
} else if ( (uniLen < 0) && (! last) ) { | ||
|
||
// Have a partial UTF-8 character at the end of the buffer and more input coming. | ||
xmlParser->ParseBuffer ( spanStart, (spanEnd - spanStart), false ); | ||
xmlParser->ParseBuffer ( copy.c_str(), copy.size(), false ); | ||
return (spanEnd - buffer); | ||
|
||
} else { | ||
|
||
// Not a valid UTF-8 sequence. Replace the first byte with the Latin-1 equivalent. | ||
xmlParser->ParseBuffer ( spanStart, (spanEnd - spanStart), false ); | ||
const char * replacement = kReplaceLatin1 [ *spanEnd - 0x80 ]; | ||
xmlParser->ParseBuffer ( replacement, strlen ( replacement ), false ); | ||
spanStart = spanEnd + 1; // ! The loop increment will do "spanEnd = spanStart". | ||
copy.append ( replacement ); | ||
|
||
} | ||
|
||
} else if ( (*spanEnd < 0x20) || (*spanEnd == 0x7F) ) { | ||
|
||
// Replace ASCII controls other than tab, LF, and CR with a space. | ||
|
||
if ( (*spanEnd == kTab) || (*spanEnd == kLF) || (*spanEnd == kCR) ) continue; | ||
if ( (*spanEnd == kTab) || (*spanEnd == kLF) || (*spanEnd == kCR) ) { | ||
copy.push_back(*spanEnd); | ||
continue; | ||
} | ||
|
||
xmlParser->ParseBuffer ( spanStart, (spanEnd - spanStart), false ); | ||
xmlParser->ParseBuffer ( " ", 1, false ); | ||
spanStart = spanEnd + 1; // ! The loop increment will do "spanEnd = spanStart". | ||
copy.push_back(' '); | ||
|
||
} else { | ||
|
||
|
@@ -1030,29 +1044,30 @@ ProcessUTF8Portion ( XMLParserAdapter * xmlParser, | |
if ( escLen < 0 ) { | ||
|
||
// Have a partial numeric escape in this buffer, wait for more input. | ||
if ( last ) continue; // No more buffers, not an escape, absorb as normal input. | ||
xmlParser->ParseBuffer ( spanStart, (spanEnd - spanStart), false ); | ||
if ( last ) { | ||
copy.push_back('&'); | ||
continue; // No more buffers, not an escape, absorb as normal input. | ||
} | ||
xmlParser->ParseBuffer ( copy.c_str(), copy.size(), false ); | ||
return (spanEnd - buffer); | ||
|
||
} else if ( escLen > 0 ) { | ||
|
||
// Have a complete numeric escape to replace. | ||
xmlParser->ParseBuffer ( spanStart, (spanEnd - spanStart), false ); | ||
xmlParser->ParseBuffer ( " ", 1, false ); | ||
spanStart = spanEnd + escLen; | ||
spanEnd = spanStart - 1; // ! The loop continuation will increment spanEnd! | ||
copy.push_back(' '); | ||
spanEnd = spanEnd + escLen - 1; // ! The loop continuation will increment spanEnd! | ||
|
||
} else { | ||
copy.push_back('&'); | ||
} | ||
|
||
} | ||
|
||
} | ||
|
||
XMP_Assert ( spanEnd == bufEnd ); | ||
|
||
if ( spanStart < bufEnd ) xmlParser->ParseBuffer ( spanStart, (spanEnd - spanStart), false ); | ||
if ( last ) xmlParser->ParseBuffer ( " ", 1, true ); | ||
|
||
copy.push_back(' '); | ||
xmlParser->ParseBuffer ( copy.c_str(), copy.size(), true ); | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
kevinbackhouse
Author
Collaborator
|
||
return length; | ||
|
||
} // ProcessUTF8Portion | ||
|
@kevinbackhouse , I know it's been a while, but should this be checking
last
?: