-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc++] basic_ios<wchar_t> cannot store fill character WCHAR_MAX #89305
Conversation
@llvm/pr-subscribers-libcxx Author: Xing Xue (xingxue-ibm) ChangesThis is a continuation of Phabricator patch D124555.
The constraints of the C++ standard impose:
Compatibility with the platform C ABI imposes:
The library is implemented correctly with respect to the C++ standard, however, this requirement imposed by C++ on C types is not also imposed by C. An implementation cannot be compliant where This patch uses the [approach](https://reviews.llvm.org/D124555#3566746) suggested by @ldionne. Class Existing targets where this would break the ABI compatibility can opt out by setting Full diff: https://github.com/llvm/llvm-project/pull/89305.diff 2 Files Affected:
diff --git a/libcxx/include/ios b/libcxx/include/ios
index 00c1d5c2d4bc58..bc31f30f14acf0 100644
--- a/libcxx/include/ios
+++ b/libcxx/include/ios
@@ -224,6 +224,7 @@ storage-class-specifier const error_category& iostream_category() noexcept;
#include <__system_error/error_code.h>
#include <__system_error/error_condition.h>
#include <__system_error/system_error.h>
+#include <__type_traits/conditional.h>
#include <__utility/swap.h>
#include <__verbose_abort>
#include <version>
@@ -521,6 +522,31 @@ inline _LIBCPP_HIDE_FROM_ABI void ios_base::exceptions(iostate __iostate) {
clear(__rdstate_);
}
+template <class _Traits>
+// Attribute 'packed' is used to keep the layout compatible with the previous
+// definition of '__set_' in basic_ios on AIX.
+struct _LIBCPP_PACKED _OptionalFill {
+ _OptionalFill() : __set_(false) { }
+ _OptionalFill& operator=(typename _Traits::int_type __x) { __set_ = true; __fill_val_ = __x; return *this; }
+ bool __is_set() const { return __set_; }
+ typename _Traits::int_type __fill() const { return __fill_val_; }
+
+private:
+ typename _Traits::int_type __fill_val_;
+ bool __set_;
+};
+
+template <class _Traits>
+struct _LIBCPP_PACKED _SentinelValueFill {
+ _SentinelValueFill() : __fill_val_(_Traits::eof()) { }
+ _SentinelValueFill& operator=(typename _Traits::int_type __x) { __fill_val_ = __x; return *this; }
+ bool __is_set() const { return __fill_val_ != _Traits::eof(); }
+ typename _Traits::int_type __fill() const { return __fill_val_; }
+
+private:
+ typename _Traits::int_type __fill_val_;
+};
+
template <class _CharT, class _Traits>
class _LIBCPP_TEMPLATE_VIS basic_ios : public ios_base {
public:
@@ -590,7 +616,24 @@ protected:
private:
basic_ostream<char_type, traits_type>* __tie_;
- mutable int_type __fill_;
+
+#if defined(_AIX) || (defined(__MVS__) && defined(__64BIT__))
+// AIX and 64-bit MVS must use _OptionalFill for ABI backward compatibility.
+ using _FillType = _OptionalFill<_Traits>;
+#else
+#if defined(_WIN32)
+ static const bool _OptOutForABICompat = true;
+#else
+ static const bool _OptOutForABICompat = false;
+#endif
+
+ using _FillType = _If<
+ sizeof(char_type) >= sizeof(int_type) && !_OptOutForABICompat,
+ _OptionalFill<_Traits>,
+ _SentinelValueFill<_Traits>
+ >;
+#endif
+ mutable _FillType __fill_;
};
template <class _CharT, class _Traits>
@@ -605,7 +648,7 @@ template <class _CharT, class _Traits>
inline _LIBCPP_HIDE_FROM_ABI void basic_ios<_CharT, _Traits>::init(basic_streambuf<char_type, traits_type>* __sb) {
ios_base::init(__sb);
__tie_ = nullptr;
- __fill_ = traits_type::eof();
+ __fill_ = widen(' ');
}
template <class _CharT, class _Traits>
@@ -655,16 +698,16 @@ inline _LIBCPP_HIDE_FROM_ABI _CharT basic_ios<_CharT, _Traits>::widen(char __c)
template <class _CharT, class _Traits>
inline _LIBCPP_HIDE_FROM_ABI _CharT basic_ios<_CharT, _Traits>::fill() const {
- if (traits_type::eq_int_type(traits_type::eof(), __fill_))
+ if (!__fill_.__is_set())
__fill_ = widen(' ');
- return __fill_;
+ return __fill_.__fill();
}
template <class _CharT, class _Traits>
inline _LIBCPP_HIDE_FROM_ABI _CharT basic_ios<_CharT, _Traits>::fill(char_type __ch) {
- if (traits_type::eq_int_type(traits_type::eof(), __fill_))
+ if (!__fill_.__is_set())
__fill_ = widen(' ');
- char_type __r = __fill_;
+ char_type __r = __fill_.__fill();
__fill_ = __ch;
return __r;
}
diff --git a/libcxx/test/std/input.output/iostream.format/std.manip/setfill_eof.pass.cpp b/libcxx/test/std/input.output/iostream.format/std.manip/setfill_eof.pass.cpp
new file mode 100644
index 00000000000000..042e07cb80bbd0
--- /dev/null
+++ b/libcxx/test/std/input.output/iostream.format/std.manip/setfill_eof.pass.cpp
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Test that weof as a wchar_t value can be set as the fill character.
+
+// UNSUPPORTED: no-wide-characters
+// REQUIRES: target=powerpc{{(64)?}}-ibm-aix || target=s390x-ibm-zos
+
+#include <iomanip>
+#include <ostream>
+#include <cassert>
+#include <string>
+
+template <class CharT>
+struct testbuf : public std::basic_streambuf<CharT> {
+ testbuf() {}
+};
+
+int main(int, char**) {
+ testbuf<wchar_t> sb;
+ std::wostream os(&sb);
+ os << std::setfill((wchar_t)std::char_traits<wchar_t>::eof());
+ assert(os.fill() == (wchar_t)std::char_traits<wchar_t>::eof());
+
+ return 0;
+}
|
…hanges are compatible with the current IBM provided libc++.
- update a comment
-define and use macros _LIBCXX_IOS_MAY_USE_OPTIONAL_FILL and _LIBCXX_IOS_FORCE_OPTIONAL_FILL.
libcxx/test/std/input.output/iostream.format/std.manip/setfill_eof.pass.cpp
Outdated
Show resolved
Hide resolved
- expect the test case to pass except windows in abi version 1.
- add note that the change only affect ABI version 2 - remove "undefining _LIBCXX_IOS_USE_FILL_HELPER to keep the ABI verson 1 behavior" - add init() functions for class _FillHelper and _SentinelValueFill
libcxx/test/std/input.output/iostream.format/std.manip/setfill_wchar_max.pass.cpp
Outdated
Show resolved
Hide resolved
…_wchar_max.pass.cpp Co-authored-by: David Tenty <[email protected]>
….cpp So we can also match aarch64 triples which have four components instead of three when disabling the test, which the case on some buildbots. Follow on to #89305
This test is already XFAIL'd for aarch64, but it seems the XFAIL in the test did not match on these bots because they have a four component triple in their test run. I've update the XFAIL condition with ad15428, which should hopefully resolve the breakage. |
Thanks. @daltenty! |
Hi, we saw a series of weird test failures on our mac stage2 builder after this patch was landed. And after hours of bisecting and we confirmed it was caused by this patch. The error message we saw on our bot:
You can see that the clang emitted some non-readable characters in this unit test. These characters were not emitted before this PR was landed. Our mac stage2 toolchain was linked to the stage1 toolchain's libcxx, that probably explained why the stage1 builder was green on this PR but the test failed on the stage2 build, since the stage1 toolchain used host toolchain's libcxx, which doesn't have this PR. The issue only happens on Mac. It looks like Windows and Linux, are unaffected. Could you take a look please? If it takes a while to fix, could you revert this change and land it later? |
@xingxue-ibm The most likely thing here is that this patch changed the size, alignment, trailing padding or some triviality property of Also, once we figure it out, we should add a test for that. |
An initial thing note is this is this buildbot seems to be explicit building the version 2 ABI, not the stable one: llvm-project/clang/cmake/caches/Fuchsia.cmake Line 129 in 1f6f97e
I'll continue investigation |
@zeroomega we’ve taken a look at the buildbot config and outputs. From what we can see your bots stage 1 use a clang with a custom v2 libc++ from cpid:
And then we have some more cmake flags for stage2 which seem to be set to link the cpid library, not the just built one in the build tree:
Our current theory based on this is your stage2 built clang is loading the cpid shipped libc++ instead of the just-built libc++ from stage 1. That could explain the problems your are seeing, since we changed the offset of the fill character in the layout of basic_ios in the unstable v2 ABI with this patch. Are you able to try running the tests with a custom (You can reach me as |
I don't think your theory is correct, as the CROSS_NATIVE flag is only going to be effective when doing cross compilation. We do have a cross compile mac-arm64 builder that why those flags were there (we should clean them up when doing native host build). In addition, when I was bisecting locally, I omit the CROSS_NATIVE flags. The flag set i used are:
So I don't think the issue is stage2 clang is loading stage1 libcxx. In addition, we always do static linking, I am not sure if |
Thanks for the additional information. That's a little mysterious then, the issue your seeing certainly seems to resemble a layout mismatch on I'll continue to try to reproduce locally with the information you've provided as well. |
I further remove unnecessary cmake flags and it still produce the same error.
The failure can be triggered by
You can get our host clang toolchain from https://chrome-infra-packages.appspot.com/p/fuchsia/third_party/clang/mac-amd64/+/integration and you just need cmake and ninja and xcode to reproduce the build. I will try to see if I can get a more concrete confirmation that the libc++ for the stage2 build is coming from stage1 toolchain build. |
@daltenty Thanks for looking into this. At this point I am 90% confident that the test failure on
So the stage2 clang, when building this binary was linked to the host's libcxx. I will try to manually correct it to the stage2 libcxx to see if the failure disappear. |
No problem, thanks for confirming (my local build is still running). That would definitely explain the issues you are seeing, since the two libraries disagree on the layout. |
….cpp So we can also match aarch64 triples which have four components instead of three when disabling the test, which the case on some buildbots. Follow on to llvm#89305
…vm#89305) `libcxx std::basic_ios` uses `WEOF` to indicate the `fill` value is uninitialized. On some platforms (e.g AIX and zOS in 64-bit mode) `wchar_t` is 4 bytes `unsigned` and `wint_t` is also 4 bytes which means `WEOF` cannot be distinguished from `WCHAR_MAX` by `std::char_traits<wchar_t>::eq_int_type()`, meaning this valid character value cannot be stored on affected platforms (as the implementation triggers reinitialization to `widen(’ ’)`). This patch introduces a new helper class `_FillHelper` uses a boolean variable to indicate whether the fill character has been initialized, which is used by default in libcxx ABI version 2. The patch does not affect ABI version 1 except for targets AIX in 32- and 64-bit and z/OS in 64-bit (so that the layout of the implementation is compatible with the current IBM system provided libc++) This is a continuation of Phabricator patch [D124555](https://reviews.llvm.org/D124555). This patch uses a modified version of the [approach](https://reviews.llvm.org/D124555#3566746) suggested by @ldionne . --------- Co-authored-by: Louis Dionne <[email protected]> Co-authored-by: David Tenty <[email protected]>
….cpp So we can also match aarch64 triples which have four components instead of three when disabling the test, which the case on some buildbots. Follow on to llvm#89305
…9305) Summary: `libcxx std::basic_ios` uses `WEOF` to indicate the `fill` value is uninitialized. On some platforms (e.g AIX and zOS in 64-bit mode) `wchar_t` is 4 bytes `unsigned` and `wint_t` is also 4 bytes which means `WEOF` cannot be distinguished from `WCHAR_MAX` by `std::char_traits<wchar_t>::eq_int_type()`, meaning this valid character value cannot be stored on affected platforms (as the implementation triggers reinitialization to `widen(’ ’)`). This patch introduces a new helper class `_FillHelper` uses a boolean variable to indicate whether the fill character has been initialized, which is used by default in libcxx ABI version 2. The patch does not affect ABI version 1 except for targets AIX in 32- and 64-bit and z/OS in 64-bit (so that the layout of the implementation is compatible with the current IBM system provided libc++) This is a continuation of Phabricator patch [D124555](https://reviews.llvm.org/D124555). This patch uses a modified version of the [approach](https://reviews.llvm.org/D124555#3566746) suggested by @ldionne . --------- Co-authored-by: Louis Dionne <[email protected]> Co-authored-by: David Tenty <[email protected]> Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250925
….cpp Summary: So we can also match aarch64 triples which have four components instead of three when disabling the test, which the case on some buildbots. Follow on to #89305 Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250935
This is a small fix to #89305. In the `__init` function of `_FillHelper`, `__fill_val_` was left uninitialized. This worked for the implementation in the PR because we always checked `__set_` before trying to read it, and would initialize if it was unset. However it turns out in earlier versions of the header (at least on AIX which followed this path), we do a read of `__fill_val_` even if `__set_` was false before initializing, to check if it matched the sentinel value, so this causes undesired behaviour and UB.
This is a small fix to llvm/llvm-project#89305. In the `__init` function of `_FillHelper`, `__fill_val_` was left uninitialized. This worked for the implementation in the PR because we always checked `__set_` before trying to read it, and would initialize if it was unset. However it turns out in earlier versions of the header (at least on AIX which followed this path), we do a read of `__fill_val_` even if `__set_` was false before initializing, to check if it matched the sentinel value, so this causes undesired behaviour and UB.
This is a small fix to llvm#89305. In the `__init` function of `_FillHelper`, `__fill_val_` was left uninitialized. This worked for the implementation in the PR because we always checked `__set_` before trying to read it, and would initialize if it was unset. However it turns out in earlier versions of the header (at least on AIX which followed this path), we do a read of `__fill_val_` even if `__set_` was false before initializing, to check if it matched the sentinel value, so this causes undesired behaviour and UB.
This is a small fix to llvm#89305. In the `__init` function of `_FillHelper`, `__fill_val_` was left uninitialized. This worked for the implementation in the PR because we always checked `__set_` before trying to read it, and would initialize if it was unset. However it turns out in earlier versions of the header (at least on AIX which followed this path), we do a read of `__fill_val_` even if `__set_` was false before initializing, to check if it matched the sentinel value, so this causes undesired behaviour and UB.
This is a small fix to llvm#89305. In the `__init` function of `_FillHelper`, `__fill_val_` was left uninitialized. This worked for the implementation in the PR because we always checked `__set_` before trying to read it, and would initialize if it was unset. However it turns out in earlier versions of the header (at least on AIX which followed this path), we do a read of `__fill_val_` even if `__set_` was false before initializing, to check if it matched the sentinel value, so this causes undesired behaviour and UB.
This is a small fix to llvm#89305. In the `__init` function of `_FillHelper`, `__fill_val_` was left uninitialized. This worked for the implementation in the PR because we always checked `__set_` before trying to read it, and would initialize if it was unset. However it turns out in earlier versions of the header (at least on AIX which followed this path), we do a read of `__fill_val_` even if `__set_` was false before initializing, to check if it matched the sentinel value, so this causes undesired behaviour and UB.
libcxx std::basic_ios
usesWEOF
to indicate thefill
value is uninitialized. On some platforms (e.g AIX and zOS in 64-bit mode)wchar_t
is 4 bytesunsigned
andwint_t
is also 4 bytes which meansWEOF
cannot be distinguished fromWCHAR_MAX
bystd::char_traits<wchar_t>::eq_int_type()
, meaning this valid character value cannot be stored on affected platforms (as the implementation triggers reinitialization towiden(’ ’)
).This patch introduces a new helper class
_FillHelper
uses a boolean variable to indicate whether the fill character has been initialized, which is used by default in libcxx ABI version 2. The patch does not affect ABI version 1 except for targets AIX in 32- and 64-bit and z/OS in 64-bit (so that the layout of the implementation is compatible with the current IBM system provided libc++)This is a continuation of Phabricator patch D124555. This patch uses a modified version of the approach suggested by @ldionne .