Skip to content

Commit

Permalink
[C23] Implement WG14 N2653 char8_t: A type for UTF-8 characters and s…
Browse files Browse the repository at this point in the history
…trings
  • Loading branch information
AaronBallman committed Jul 15, 2024
1 parent 2905372 commit f820346
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 2 deletions.
7 changes: 7 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,13 @@ C23 Feature Support
- Added the ``FLT_NORM_MAX``, ``DBL_NORM_MAX``, and ``LDBL_NORM_MAX`` to the
freestanding implementation of ``<float.h>`` that ships with Clang.

- Implemented support for
`WG14 N2653 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2653.htm>`_
which changes the underlying type of ``u8`` character and string constants
from ``char`` to ``char8_t``, which is a type defined in ``<uchar.h>`` as
being the same type as ``unsigned char``. Also adds support for the type in
``<stdatomic.h>`` in C23 and later modes.

Non-comprehensive list of changes in this release
-------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Frontend/InitPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1349,7 +1349,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
getLockFreeValue(TI.get##Type##Width(), TI));
DEFINE_LOCK_FREE_MACRO(BOOL, Bool);
DEFINE_LOCK_FREE_MACRO(CHAR, Char);
if (LangOpts.Char8)
if (LangOpts.Char8 || LangOpts.C23)
DEFINE_LOCK_FREE_MACRO(CHAR8_T, Char); // Treat char8_t like char.
DEFINE_LOCK_FREE_MACRO(CHAR16_T, Char16);
DEFINE_LOCK_FREE_MACRO(CHAR32_T, Char32);
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Headers/stdatomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ extern "C" {

#define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE
#define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
#define ATOMIC_CHAR8_T_LOCK_FREE __CLANG_ATOMIC_CHAR8_T_LOCK_FREE
#endif
#define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE
#define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE
#define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE
Expand Down Expand Up @@ -104,6 +107,9 @@ typedef _Atomic(long) atomic_long;
typedef _Atomic(unsigned long) atomic_ulong;
typedef _Atomic(long long) atomic_llong;
typedef _Atomic(unsigned long long) atomic_ullong;
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
typedef _Atomic(unsigned char) atomic_char8_t;
#endif
typedef _Atomic(uint_least16_t) atomic_char16_t;
typedef _Atomic(uint_least32_t) atomic_char32_t;
typedef _Atomic(wchar_t) atomic_wchar_t;
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Sema/SemaExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2051,6 +2051,8 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
} else if (Literal.isUTF8()) {
if (getLangOpts().Char8)
CharTy = Context.Char8Ty;
else if (getLangOpts().C23)
CharTy = Context.UnsignedCharTy;
Kind = StringLiteralKind::UTF8;
} else if (Literal.isUTF16()) {
CharTy = Context.Char16Ty;
Expand Down
39 changes: 39 additions & 0 deletions clang/test/C/C23/n2653.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// RUN: %clang_cc1 -verify=pre-c23 -ffreestanding -std=c17 %s
// RUD: %clang_cc1 -verify -ffreestanding -std=c23 %s

/* WG14 N2653: Clang 19
* char8_t: A type for UTF-8 characters and strings
*/

// expected-no-diagnostics

#include <stdatomic.h>

typedef unsigned char char8_t; // in <uchar.h>, which Clang does not provide.

#if __STDC_VERSION__ >= 202311L
#define LITERAL_TYPE char8_t
#define LITERAL_UNDERLYING_TYPE unsigned char

// Ensure that char8_t has the same lock-free capabilities as unsigned char.
#if defined(ATOMIC_CHAR8_T_LOCK_FREE) != defined(ATOMIC_CHAR_LOCK_FREE) || \
ATOMIC_CHAR8_T_LOCK_FREE != ATOMIC_CHAR_LOCK_FREE
#error "invalid char8_t atomic lock free status"
#endif

#else
#define LITERAL_TYPE char
#define LITERAL_UNDERLYING_TYPE char

// Ensure we don't define the lock-free status in earlier modes.
#if defined(ATOMIC_CHAR8_T_LOCK_FREE)
#error "ATOMIC_CHAR8_T_LOCK_FREE should not be defined"
#endif
#endif

// Ensure we get the type of the literal correct.
_Static_assert(_Generic(u8""[0], LITERAL_TYPE : 1, default : 0), "");
_Static_assert(_Generic(u8""[0], LITERAL_UNDERLYING_TYPE : 1, default : 0), "");

// Ensure we have a datatype for atomic operations.
atomic_char8_t val; // pre-c23-error {{unknown type name 'atomic_char8_t'}}
2 changes: 1 addition & 1 deletion clang/www/c_status.html
Original file line number Diff line number Diff line change
Expand Up @@ -1066,7 +1066,7 @@ <h2 id="c2x">C23 implementation status</h2>
<tr>
<td>char8_t: A type for UTF-8 characters and strings</td>
<td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2653.htm">N2653</a></td>
<td class="none" align="center">No</td>
<td class="unreleased" align="center">Clang 19</td>
</tr>
<tr>
<td>Clarification for max exponent macros-update</td>
Expand Down

0 comments on commit f820346

Please sign in to comment.