diff --git a/src/Norm/base/JsonLex.inc b/src/Norm/base/JsonLex.inc index 2a9b4fc5..9225f8e6 100644 --- a/src/Norm/base/JsonLex.inc +++ b/src/Norm/base/JsonLex.inc @@ -1116,17 +1116,14 @@ YY_RULE_SETUP // On retourne la chaine convertie en cas de succes if (bOk) return STRINGVALUE; - // Sinon, on retourne la chaine originelle + // Sinon, on retourne la chaine convertie au mieux, mais en tant que chaine erronnee else - { - *sValue = (char*)&yytext[1]; return STRINGERROR; - } } YY_BREAK case 11: YY_RULE_SETUP -#line 70 "JsonLex.lex" +#line 67 "JsonLex.lex" { char* endptr; double dValue; @@ -1139,12 +1136,12 @@ YY_RULE_SETUP case 12: /* rule 12 can match eol */ YY_RULE_SETUP -#line 79 "JsonLex.lex" +#line 76 "JsonLex.lex" {/*IGNORE*/} YY_BREAK case 13: YY_RULE_SETUP -#line 81 "JsonLex.lex" +#line 78 "JsonLex.lex" { ALString *sValue; @@ -1155,10 +1152,10 @@ YY_RULE_SETUP YY_BREAK case 14: YY_RULE_SETUP -#line 89 "JsonLex.lex" +#line 86 "JsonLex.lex" ECHO; YY_BREAK -#line 1161 "C:/Applications/boullema/DevGit/khiops/src/Norm/base/JsonLex.inc" +#line 1158 "C:/Applications/boullema/DevGit/khiops/src/Norm/base/JsonLex.inc" case YY_STATE_EOF(INITIAL): yyterminate(); @@ -2175,5 +2172,5 @@ void yyfree (void * ptr ) #define YYTABLES_NAME "yytables" -#line 89 "JsonLex.lex" +#line 86 "JsonLex.lex" diff --git a/src/Norm/base/JsonLex.lex b/src/Norm/base/JsonLex.lex index ce1c8c0f..bf8e7063 100644 --- a/src/Norm/base/JsonLex.lex +++ b/src/Norm/base/JsonLex.lex @@ -59,12 +59,9 @@ null {return NULLVALUE;} // On retourne la chaine convertie en cas de succes if (bOk) return STRINGVALUE; - // Sinon, on retourne la chaine originelle + // Sinon, on retourne la chaine convertie au mieux, mais en tant que chaine erronnee else - { - *sValue = (char*)&yytext[1]; return STRINGERROR; - } } {NUMBER} { diff --git a/src/Norm/base/TextService.cpp b/src/Norm/base/TextService.cpp index bf232ed0..cda128a1 100644 --- a/src/Norm/base/TextService.cpp +++ b/src/Norm/base/TextService.cpp @@ -400,6 +400,7 @@ boolean TextService::JsonToCString(const char* sJsonString, ALString& sCString) const char* sCharsToAdd; ALString sUnicodeChars; int nCharNumber; + int nUTF8CharLength; require(sJsonString != NULL); @@ -422,6 +423,7 @@ boolean TextService::JsonToCString(const char* sJsonString, ALString& sCString) { if (sInputString[nEnd] == '\\') { + // On concatene ce qui precede AppendSubString(sCString, sJsonString, nBegin, nEnd - nBegin); nEnd++; assert(nEnd < nLength); @@ -462,6 +464,7 @@ boolean TextService::JsonToCString(const char* sJsonString, ALString& sCString) { bOk = false; sCharsToAdd = "?"; + assert(nCharNumber == 1); break; } assert(nEnd < nLength); @@ -517,6 +520,7 @@ boolean TextService::JsonToCString(const char* sJsonString, ALString& sCString) { bOk = false; sCharsToAdd = "?"; + assert(nCharNumber == 1); break; } } @@ -540,9 +544,9 @@ boolean TextService::JsonToCString(const char* sJsonString, ALString& sCString) // En principe, impossible avec une chaine json correctement formee // Dans ce cas, on avance d'un caractere, avec une erreur bOk = false; - assert(nCharNumber == 1); sCharsToAdd = "?"; - nEnd++; + assert(nCharNumber == 1); + break; } if (nCharNumber == 1) sCString += sCharsToAdd[0]; @@ -553,14 +557,18 @@ boolean TextService::JsonToCString(const char* sJsonString, ALString& sCString) } else { - nEnd++; + nUTF8CharLength = GetValidUTF8CharLengthAt(sJsonString, nEnd); + if (nUTF8CharLength > 0) + nEnd += nUTF8CharLength; + else + { + // Caractere Utf8 invalide: on avance de 1 avec une erreur + bOk = false; + nEnd++; + } } } AppendSubString(sCString, sJsonString, nBegin, nEnd - nBegin); - - // Verification de l'encodage utf8, sauf si on a incorpore des caracteres ansi en mode ForceAnsi - if (bOk and not bContainsAnsiChars) - bOk = GetValidUTF8SubStringLength(sCString) == sCString.GetLength(); return bOk; } @@ -1016,18 +1024,17 @@ const ALString TextService::ToPrintable(const ALString& sBytes) return sPrintableBytes; } -int TextService::GetValidUTF8CharLengthAt(const ALString& sValue, int nStart) +int TextService::GetValidUTF8CharLengthAt(const char* sValue, int nStart) { int nUtf8CharLength; - int c; - int nLength; + unsigned char c; - require(0 <= nStart and nStart < sValue.GetLength()); + require(sValue != NULL); + require(0 <= nStart and sValue[nStart] != '\0'); // Initialisations nUtf8CharLength = 0; - nLength = sValue.GetLength(); - c = (unsigned char)sValue.GetAt(nStart); + c = (unsigned char)sValue[nStart]; // Cas d'un caractere ascii 0bbbbbbb if (0x00 <= c and c <= 0x7f) @@ -1035,7 +1042,7 @@ int TextService::GetValidUTF8CharLengthAt(const ALString& sValue, int nStart) // Debut d'un caractere UTF8 sur deux octets 110bbbbb else if ((c & 0xE0) == 0xC0) { - if (nStart + 1 < nLength and ((unsigned char)sValue.GetAt(nStart + 1) & 0xC0) == 0x80) + if (((unsigned char)sValue[nStart + 1] & 0xC0) == 0x80) nUtf8CharLength = 2; else nUtf8CharLength = 0; @@ -1043,8 +1050,9 @@ int TextService::GetValidUTF8CharLengthAt(const ALString& sValue, int nStart) // Debut d'un caractere UTF8 sur trois octets 1110bbbb else if ((c & 0xF0) == 0xE0) { - if (nStart + 2 < nLength and ((unsigned char)sValue.GetAt(nStart + 1) & 0xC0) == 0x80 and - ((unsigned char)sValue.GetAt(nStart + 2) & 0xC0) == 0x80) + // Test sans risque, puis le second caractere n'est pas teste si le premier vaut '\0' + if (((unsigned char)sValue[nStart + 1] & 0xC0) == 0x80 and + ((unsigned char)sValue[nStart + 2] & 0xC0) == 0x80) nUtf8CharLength = 3; else nUtf8CharLength = 0; @@ -1052,9 +1060,10 @@ int TextService::GetValidUTF8CharLengthAt(const ALString& sValue, int nStart) // Debut d'un caractere UTF8 sur quatre octets 11110bbb else if ((c & 0xF8) == 0xF0) { - if (nStart + 3 < nLength and ((unsigned char)sValue.GetAt(nStart + 1) & 0xC0) == 0x80 and - ((unsigned char)sValue.GetAt(nStart + 2) & 0xC0) == 0x80 and - ((unsigned char)sValue.GetAt(nStart + 3) & 0xC0) == 0x80) + // Test sans risque, puis le troisieme caractere n'est pas teste si un des permier vaut '\0' + if (((unsigned char)sValue[nStart + 1] & 0xC0) == 0x80 and + ((unsigned char)sValue[nStart + 2] & 0xC0) == 0x80 and + ((unsigned char)sValue[nStart + 3] & 0xC0) == 0x80) nUtf8CharLength = 4; else nUtf8CharLength = 0; @@ -1062,14 +1071,16 @@ int TextService::GetValidUTF8CharLengthAt(const ALString& sValue, int nStart) return nUtf8CharLength; } -int TextService::GetValidUTF8SubStringLength(const ALString& sValue) +int TextService::GetValidUTF8SubStringLength(const char* sValue) { - int nLength; int nUTF8CharLength; + int nLength; + + require(sValue != NULL); // Parcours de la chaine jusqu'au premiere catactere non UTF8 nLength = 0; - while (nLength < sValue.GetLength()) + while (sValue[nLength] != '\0') { nUTF8CharLength = GetValidUTF8CharLengthAt(sValue, nLength); if (nUTF8CharLength > 0) @@ -1077,8 +1088,8 @@ int TextService::GetValidUTF8SubStringLength(const ALString& sValue) else break; } - assert(nLength <= sValue.GetLength()); - assert(nLength == sValue.GetLength() or GetValidUTF8CharLengthAt(sValue, nLength) == 0); + assert(nLength <= (int)strlen(sValue)); + assert(nLength == (int)strlen(sValue) or GetValidUTF8CharLengthAt(sValue, nLength) == 0); return nLength; } diff --git a/src/Norm/base/TextService.h b/src/Norm/base/TextService.h index dc96fb65..44211de0 100644 --- a/src/Norm/base/TextService.h +++ b/src/Norm/base/TextService.h @@ -138,10 +138,10 @@ class TextService : public Object // Longueur en bytes d'un caractere UTF8 valide a partir d'une position donnee // Retourne 1 a 4 dans le cas d'un caractere valide, 0 sinon pour un caractere ANSI non encodable directement - static int GetValidUTF8CharLengthAt(const ALString& sValue, int nStart); + static int GetValidUTF8CharLengthAt(const char* sValue, int nStart); // Longueur en bytes de la sous-partie d'une chaine encodee avec des caracteres UTF8 valide - static int GetValidUTF8SubStringLength(const ALString& sValue); + static int GetValidUTF8SubStringLength(const char* sValue); // Construction d'un echantillon de textes basiques pour des tests static void BuildTextSample(StringVector* svTextValues);