From ea92b1f9d0fc31f1fd97ad04eb0412003a37cb0d Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Sun, 31 Mar 2024 11:09:52 +0200 Subject: [PATCH] [Sema] Implement support for -Wformat-signedness (#74440) In gcc there exist a modifier option -Wformat-signedness that turns on additional signedness warnings in the already existing -Wformat warning. This patch implements that support in clang. This is done by adding a dummy warning diag::warn_format_conversion_argument_type_mismatch_signedness that is never emitted and only used as an option to toggle the signedness warning in -Wformat. This will ensure gcc compatibility. --- clang/docs/ReleaseNotes.rst | 3 + clang/include/clang/AST/FormatString.h | 2 + clang/include/clang/Basic/DiagnosticGroups.td | 1 + .../clang/Basic/DiagnosticSemaKinds.td | 3 + clang/lib/AST/FormatString.cpp | 29 ++- clang/lib/Sema/SemaChecking.cpp | 33 ++- .../Sema/format-strings-signedness-fixit.c | 98 ++++++++ clang/test/Sema/format-strings-signedness.c | 222 ++++++++++++++++++ 8 files changed, 379 insertions(+), 12 deletions(-) create mode 100644 clang/test/Sema/format-strings-signedness-fixit.c create mode 100644 clang/test/Sema/format-strings-signedness.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 37b843915a0dee..76eaf0bf11c303 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -310,6 +310,9 @@ Improvements to Clang's diagnostics - Clang now no longer diagnoses type definitions in ``offsetof`` in C23 mode. Fixes #GH83658. +- New ``-Wformat-signedness`` diagnostic that warn if the format string requires an + unsigned argument and the argument is signed and vice versa. + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index e2232fb4a47153..a074dd23e2ad4c 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -284,6 +284,8 @@ class ArgType { /// The conversion specifier and the argument type are disallowed by the C /// standard, but are in practice harmless. For instance, "%p" and int*. NoMatchPedantic, + /// The conversion specifier and the argument type have different sign. + NoMatchSignedness, /// The conversion specifier and the argument type are compatible, but still /// seems likely to be an error. For instance, "%hd" and _Bool. NoMatchTypeConfusion, diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 44035e2fd16f2e..520168f01fd846 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -985,6 +985,7 @@ def FormatSecurity : DiagGroup<"format-security">; def FormatNonStandard : DiagGroup<"format-non-iso">; def FormatY2K : DiagGroup<"format-y2k">; def FormatPedantic : DiagGroup<"format-pedantic">; +def FormatSignedness : DiagGroup<"format-signedness">; def FormatTypeConfusion : DiagGroup<"format-type-confusion">; def FormatOverflowNonKprintf: DiagGroup<"format-overflow-non-kprintf">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index fbc9c04dfba939..df57f5e6ce11ba 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9821,6 +9821,9 @@ def warn_format_conversion_argument_type_mismatch : Warning< def warn_format_conversion_argument_type_mismatch_pedantic : Extension< warn_format_conversion_argument_type_mismatch.Summary>, InGroup; +def warn_format_conversion_argument_type_mismatch_signedness : Warning< + warn_format_conversion_argument_type_mismatch.Summary>, + InGroup, DefaultIgnore; def warn_format_conversion_argument_type_mismatch_confusion : Warning< warn_format_conversion_argument_type_mismatch.Summary>, InGroup, DefaultIgnore; diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp index 0c80ad109ccbb2..da8164bad518ec 100644 --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -413,7 +413,7 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const { return Match; if (const auto *BT = argTy->getAs()) { // Check if the only difference between them is signed vs unsigned - // if true, we consider they are compatible. + // if true, return match signedness. switch (BT->getKind()) { default: break; @@ -423,44 +423,53 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const { [[fallthrough]]; case BuiltinType::Char_S: case BuiltinType::SChar: + if (T == C.UnsignedShortTy || T == C.ShortTy) + return NoMatchTypeConfusion; + if (T == C.UnsignedCharTy) + return NoMatchSignedness; + if (T == C.SignedCharTy) + return Match; + break; case BuiltinType::Char_U: case BuiltinType::UChar: if (T == C.UnsignedShortTy || T == C.ShortTy) return NoMatchTypeConfusion; - if (T == C.UnsignedCharTy || T == C.SignedCharTy) + if (T == C.UnsignedCharTy) return Match; + if (T == C.SignedCharTy) + return NoMatchSignedness; break; case BuiltinType::Short: if (T == C.UnsignedShortTy) - return Match; + return NoMatchSignedness; break; case BuiltinType::UShort: if (T == C.ShortTy) - return Match; + return NoMatchSignedness; break; case BuiltinType::Int: if (T == C.UnsignedIntTy) - return Match; + return NoMatchSignedness; break; case BuiltinType::UInt: if (T == C.IntTy) - return Match; + return NoMatchSignedness; break; case BuiltinType::Long: if (T == C.UnsignedLongTy) - return Match; + return NoMatchSignedness; break; case BuiltinType::ULong: if (T == C.LongTy) - return Match; + return NoMatchSignedness; break; case BuiltinType::LongLong: if (T == C.UnsignedLongLongTy) - return Match; + return NoMatchSignedness; break; case BuiltinType::ULongLong: if (T == C.LongLongTy) - return Match; + return NoMatchSignedness; break; } // "Partially matched" because of promotions? diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 2684535d8e53d1..11401b6f56c0ea 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -12446,6 +12446,19 @@ isArithmeticArgumentPromotion(Sema &S, const ImplicitCastExpr *ICE) { S.Context.getFloatingTypeOrder(From, To) < 0; } +static analyze_format_string::ArgType::MatchKind +handleFormatSignedness(analyze_format_string::ArgType::MatchKind Match, + DiagnosticsEngine &Diags, SourceLocation Loc) { + if (Match == analyze_format_string::ArgType::NoMatchSignedness) { + Match = + Diags.isIgnored( + diag::warn_format_conversion_argument_type_mismatch_signedness, Loc) + ? analyze_format_string::ArgType::Match + : analyze_format_string::ArgType::NoMatch; + } + return Match; +} + bool CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, const char *StartSpecifier, @@ -12489,6 +12502,9 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, ArgType::MatchKind ImplicitMatch = ArgType::NoMatch; ArgType::MatchKind Match = AT.matchesType(S.Context, ExprTy); + ArgType::MatchKind OrigMatch = Match; + + Match = handleFormatSignedness(Match, S.getDiagnostics(), E->getExprLoc()); if (Match == ArgType::Match) return true; @@ -12512,6 +12528,14 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, ICE->getType() == S.Context.UnsignedIntTy) { // All further checking is done on the subexpression ImplicitMatch = AT.matchesType(S.Context, ExprTy); + if (OrigMatch == ArgType::NoMatchSignedness && + ImplicitMatch != ArgType::NoMatchSignedness) + // If the original match was a signedness match this match on the + // implicit cast type also need to be signedness match otherwise we + // might introduce new unexpected warnings from -Wformat-signedness. + return true; + ImplicitMatch = handleFormatSignedness( + ImplicitMatch, S.getDiagnostics(), E->getExprLoc()); if (ImplicitMatch == ArgType::Match) return true; } @@ -12633,6 +12657,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, case ArgType::Match: case ArgType::MatchPromotion: case ArgType::NoMatchPromotionTypeConfusion: + case ArgType::NoMatchSignedness: llvm_unreachable("expected non-matching"); case ArgType::NoMatchPedantic: Diag = diag::warn_format_conversion_argument_type_mismatch_pedantic; @@ -12668,8 +12693,10 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, CastFix << (S.LangOpts.CPlusPlus ? ">" : ")"); SmallVector Hints; - if (AT.matchesType(S.Context, IntendedTy) != ArgType::Match || - ShouldNotPrintDirectly) + ArgType::MatchKind IntendedMatch = AT.matchesType(S.Context, IntendedTy); + IntendedMatch = handleFormatSignedness(IntendedMatch, S.getDiagnostics(), + E->getExprLoc()); + if ((IntendedMatch != ArgType::Match) || ShouldNotPrintDirectly) Hints.push_back(FixItHint::CreateReplacement(SpecRange, os.str())); if (const CStyleCastExpr *CCast = dyn_cast(E)) { @@ -12738,6 +12765,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, case ArgType::Match: case ArgType::MatchPromotion: case ArgType::NoMatchPromotionTypeConfusion: + case ArgType::NoMatchSignedness: llvm_unreachable("expected non-matching"); case ArgType::NoMatchPedantic: Diag = diag::warn_format_conversion_argument_type_mismatch_pedantic; @@ -12949,6 +12977,7 @@ bool CheckScanfHandler::HandleScanfSpecifier( analyze_format_string::ArgType::MatchKind Match = AT.matchesType(S.Context, Ex->getType()); + Match = handleFormatSignedness(Match, S.getDiagnostics(), Ex->getExprLoc()); bool Pedantic = Match == analyze_format_string::ArgType::NoMatchPedantic; if (Match == analyze_format_string::ArgType::Match) return true; diff --git a/clang/test/Sema/format-strings-signedness-fixit.c b/clang/test/Sema/format-strings-signedness-fixit.c new file mode 100644 index 00000000000000..b4e6e975657aae --- /dev/null +++ b/clang/test/Sema/format-strings-signedness-fixit.c @@ -0,0 +1,98 @@ +// RUN: cp %s %t +// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -Wformat -Wformat-signedness -fixit %t +// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -fsyntax-only -Wformat -Wformat-signedness -Werror %t +// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -E -o - %t | FileCheck %s + +#include + +int printf(const char *restrict format, ...); + +void test_printf_int(int x) +{ + printf("%u", x); +} + +void test_printf_unsigned(unsigned x) +{ + printf("%d", x); +} + +void test_printf_long(long x) +{ + printf("%lu", x); +} + +void test_printf_unsigned_long(unsigned long x) +{ + printf("%ld", x); +} + +void test_printf_long_long(long long x) +{ + printf("%llu", x); +} + +void test_printf_unsigned_long_long(unsigned long long x) +{ + printf("%lld", x); +} + +enum enum_int { + minus_1 = -1 +}; + +void test_printf_enum_int(enum enum_int x) +{ + printf("%u", x); +} + +enum enum_unsigned { + zero = 0 +}; + +void test_printf_enum_unsigned(enum enum_unsigned x) +{ + printf("%d", x); +} + +enum enum_long { + minus_one = -1, + int_val = INT_MAX, + unsigned_val = (unsigned)INT_MIN +}; + +void test_printf_enum_long(enum enum_long x) +{ + printf("%lu", x); +} + +enum enum_unsigned_long { + uint_max_plus = (unsigned long)UINT_MAX+1, +}; + +void test_printf_enum_unsigned_long(enum enum_unsigned_long x) +{ + printf("%ld", x); +} + +// Validate the fixes. +// CHECK: void test_printf_int(int x) +// CHECK: printf("%d", x); +// CHECK: void test_printf_unsigned(unsigned x) +// CHECK: printf("%u", x); +// CHECK: void test_printf_long(long x) +// CHECK: printf("%ld", x); +// CHECK: void test_printf_unsigned_long(unsigned long x) +// CHECK: printf("%lu", x); +// CHECK: void test_printf_long_long(long long x) +// CHECK: printf("%lld", x); +// CHECK: void test_printf_unsigned_long_long(unsigned long long x) +// CHECK: printf("%llu", x); +// CHECK: void test_printf_enum_int(enum enum_int x) +// CHECK: printf("%d", x); +// CHECK: void test_printf_enum_unsigned(enum enum_unsigned x) +// CHECK: printf("%u", x); +// CHECK: void test_printf_enum_long(enum enum_long x) +// CHECK: printf("%ld", x); +// CHECK: void test_printf_enum_unsigned_long(enum enum_unsigned_long x) +// CHECK: printf("%lu", x); diff --git a/clang/test/Sema/format-strings-signedness.c b/clang/test/Sema/format-strings-signedness.c new file mode 100644 index 00000000000000..d5a8140d9ef8a0 --- /dev/null +++ b/clang/test/Sema/format-strings-signedness.c @@ -0,0 +1,222 @@ +// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -std=c11 -fsyntax-only -verify -Wformat -Wformat-signedness %s +// RUN: %clang_cc1 -triple=x86_64-pc-win32 -std=c11 -fsyntax-only -verify -Wformat -Wformat-signedness %s + +// Verify that -Wformat-signedness alone (without -Wformat) trigger the +// warnings. Note in gcc this will not trigger the signedness warnings as +// -Wformat is default off in gcc. +// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -std=c11 -fsyntax-only -verify -Wformat-signedness %s +// RUN: %clang_cc1 -triple=x86_64-pc-win32 -std=c11 -fsyntax-only -verify -Wformat-signedness %s + +// Verify that -Wformat-signedness warnings are not reported with only -Wformat +// (gcc compat). +// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -std=c11 -fsyntax-only -Wformat -verify=okay %s + +// Verify that -Wformat-signedness with -Wno-format are not reported (gcc compat). +// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -std=c11 -fsyntax-only -Wformat-signedness -Wno-format -verify=okay %s +// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -std=c11 -fsyntax-only -Wno-format -Wformat-signedness -verify=okay %s +// okay-no-diagnostics + +int printf(const char *restrict format, ...); +int scanf(const char * restrict, ...); + +void test_printf_bool(_Bool x) +{ + printf("%d", x); // no-warning + printf("%u", x); // no-warning + printf("%x", x); // no-warning +} + +void test_printf_char(char x) +{ + printf("%c", x); // no-warning +} + +void test_printf_unsigned_char(unsigned char x) +{ + printf("%c", x); // no-warning +} + +void test_printf_int(int x) +{ + printf("%d", x); // no-warning + printf("%u", x); // expected-warning{{format specifies type 'unsigned int' but the argument has type 'int'}} + printf("%x", x); // expected-warning{{format specifies type 'unsigned int' but the argument has type 'int'}} +} + +void test_printf_unsigned(unsigned x) +{ + printf("%d", x); // expected-warning{{format specifies type 'int' but the argument has type 'unsigned int'}} + printf("%u", x); // no-warning + printf("%x", x); // no-warning +} + +void test_printf_long(long x) +{ + printf("%ld", x); // no-warning + printf("%lu", x); // expected-warning{{format specifies type 'unsigned long' but the argument has type 'long'}} + printf("%lx", x); // expected-warning{{format specifies type 'unsigned long' but the argument has type 'long'}} +} + +void test_printf_unsigned_long(unsigned long x) +{ + printf("%ld", x); // expected-warning{{format specifies type 'long' but the argument has type 'unsigned long'}} + printf("%lu", x); // no-warning + printf("%lx", x); // no-warning +} + +void test_printf_long_long(long long x) +{ + printf("%lld", x); // no-warning + printf("%llu", x); // expected-warning{{format specifies type 'unsigned long long' but the argument has type 'long long'}} + printf("%llx", x); // expected-warning{{format specifies type 'unsigned long long' but the argument has type 'long long'}} +} + +void test_printf_unsigned_long_long(unsigned long long x) +{ + printf("%lld", x); // expected-warning{{format specifies type 'long long' but the argument has type 'unsigned long long'}} + printf("%llu", x); // no-warning + printf("%llx", x); // no-warning +} + +enum enum_int { + minus_1 = -1 +}; + +void test_printf_enum_int(enum enum_int x) +{ + printf("%d", x); // no-warning + printf("%u", x); // expected-warning{{format specifies type 'unsigned int' but the argument has underlying type 'int'}} + printf("%x", x); // expected-warning{{format specifies type 'unsigned int' but the argument has underlying type 'int'}} +} + +#ifndef _WIN32 // Disabled due to enums have different underlying type on _WIN32 +enum enum_unsigned { + zero = 0 +}; + +void test_printf_enum_unsigned(enum enum_unsigned x) +{ + printf("%d", x); // expected-warning{{format specifies type 'int' but the argument has underlying type 'unsigned int'}} + printf("%u", x); // no-warning + printf("%x", x); // no-warning +} + +enum enum_long { + minus_one = -1, + int_val = __INT_MAX__, // INT_MAX + unsigned_val = (unsigned)(-__INT_MAX__ -1) // (unsigned)INT_MIN +}; + +void test_printf_enum_long(enum enum_long x) +{ + printf("%ld", x); // no-warning + printf("%lu", x); // expected-warning{{format specifies type 'unsigned long' but the argument has underlying type 'long'}} + printf("%lx", x); // expected-warning{{format specifies type 'unsigned long' but the argument has underlying type 'long'}} +} + +enum enum_unsigned_long { + uint_max_plus = (unsigned long)(__INT_MAX__ *2U +1U)+1, // (unsigned long)UINT_MAX+1 +}; + +void test_printf_enum_unsigned_long(enum enum_unsigned_long x) +{ + printf("%ld", x); // expected-warning{{format specifies type 'long' but the argument has underlying type 'unsigned long'}} + printf("%lu", x); // no-warning + printf("%lx", x); // no-warning +} +#endif + +void test_scanf_char(char *y) { + scanf("%c", y); // no-warning +} + +void test_scanf_unsigned_char(unsigned char *y) { + scanf("%c", y); // no-warning +} + +void test_scanf_int(int *x) { + scanf("%d", x); // no-warning + scanf("%u", x); // expected-warning{{format specifies type 'unsigned int *' but the argument has type 'int *'}} + scanf("%x", x); // expected-warning{{format specifies type 'unsigned int *' but the argument has type 'int *'}} +} + +void test_scanf_unsigned(unsigned *x) { + scanf("%d", x); // expected-warning{{format specifies type 'int *' but the argument has type 'unsigned int *'}} + scanf("%u", x); // no-warning + scanf("%x", x); // no-warning +} + +void test_scanf_long(long *x) { + scanf("%ld", x); // no-warning + scanf("%lu", x); // expected-warning{{format specifies type 'unsigned long *' but the argument has type 'long *'}} + scanf("%lx", x); // expected-warning{{format specifies type 'unsigned long *' but the argument has type 'long *'}} +} + +void test_scanf_unsigned_long(unsigned long *x) { + scanf("%ld", x); // expected-warning{{format specifies type 'long *' but the argument has type 'unsigned long *'}} + scanf("%lu", x); // no-warning + scanf("%lx", x); // no-warning +} + +void test_scanf_longlong(long long *x) { + scanf("%lld", x); // no-warning + scanf("%llu", x); // expected-warning{{format specifies type 'unsigned long long *' but the argument has type 'long long *'}} + scanf("%llx", x); // expected-warning{{format specifies type 'unsigned long long *' but the argument has type 'long long *'}} +} + +void test_scanf_unsigned_longlong(unsigned long long *x) { + scanf("%lld", x); // expected-warning{{format specifies type 'long long *' but the argument has type 'unsigned long long *'}} + scanf("%llu", x); // no-warning + scanf("%llx", x); // no-warning +} + +void test_scanf_enum_int(enum enum_int *x) { + scanf("%d", x); // no-warning + scanf("%u", x); // expected-warning{{format specifies type 'unsigned int *' but the argument has type 'enum enum_int *'}} + scanf("%x", x); // expected-warning{{format specifies type 'unsigned int *' but the argument has type 'enum enum_int *'}} +} + +#ifndef _WIN32 // Disabled due to enums have different underlying type on _WIN32 +void test_scanf_enum_unsigned(enum enum_unsigned *x) { + scanf("%d", x); // expected-warning{{format specifies type 'int *' but the argument has type 'enum enum_unsigned *'}} + scanf("%u", x); // no-warning + scanf("%x", x); // no-warning +} + +void test_scanf_enum_long(enum enum_long *x) { + scanf("%ld", x); // no-warning + scanf("%lu", x); // expected-warning{{format specifies type 'unsigned long *' but the argument has type 'enum enum_long *'}} + scanf("%lx", x); // expected-warning{{format specifies type 'unsigned long *' but the argument has type 'enum enum_long *'}} +} + +void test_scanf_enum_unsigned_long(enum enum_unsigned_long *x) { + scanf("%ld", x); // expected-warning{{format specifies type 'long *' but the argument has type 'enum enum_unsigned_long *'}} + scanf("%lu", x); // no-warning + scanf("%lx", x); // no-warning +} +#endif + +// Verify that we get no warnings from + +typedef short int int16_t; +typedef unsigned short int uint16_t; + +void test_printf_priX16(int16_t x) { + printf("PRId16: %" "d" /*PRId16*/ "\n", x); // no-warning + printf("PRIi16: %" "i" /*PRIi16*/ "\n", x); // no-warning +} + +void test_printf_unsigned_priX16(uint16_t x) { + printf("PRIo16: %" "o" /*PRIo16*/ "\n", x); // no-warning + printf("PRIu16: %" "u" /*PRIu16*/ "\n", x); // no-warning + printf("PRIx16: %" "x" /*PRIx16*/ "\n", x); // no-warning + printf("PRIX16: %" "X" /*PRIX16*/ "\n", x); // no-warning +} + +// Verify that we can suppress a -Wformat-signedness warning by ignoring +// -Wformat (gcc compat). +void test_suppress(int x) +{ +#pragma GCC diagnostic ignored "-Wformat" + printf("%u", x); +}