Skip to content

Commit

Permalink
OIIO::bitcast adjustments
Browse files Browse the repository at this point in the history
Use gcc __builtin_bit_cast when available.

Get rid of specializations -- they are not needed, as verified by
godbolt.

Signed-off-by: Larry Gritz <[email protected]>
  • Loading branch information
lgritz committed Dec 31, 2023
1 parent 934eabc commit 2b754c6
Showing 1 changed file with 15 additions and 42 deletions.
57 changes: 15 additions & 42 deletions src/include/OpenImageIO/fmath.h
Original file line number Diff line number Diff line change
Expand Up @@ -761,15 +761,23 @@ inline OIIO_HOSTDEVICE float sign (float x)
/// equivalently to C++20 std::bit_cast, but it works prior to C++20 and
/// it has the right decorators to work with Cuda.
/// @version 2.4.1
template <typename OUT_TYPE, typename IN_TYPE>
OIIO_FORCEINLINE OIIO_HOSTDEVICE OUT_TYPE bitcast (const IN_TYPE& in) noexcept {
// NOTE: this is the only standards compliant way of doing this type of casting,
// luckily the compilers we care about know how to optimize away this idiom.
static_assert(sizeof(IN_TYPE) == sizeof(OUT_TYPE),
template <typename To, typename From>
OIIO_FORCEINLINE OIIO_HOSTDEVICE To bitcast (const From& in) noexcept {
static_assert(sizeof(From) == sizeof(To),
"bit_cast must be between objects of the same size");
OUT_TYPE out;
memcpy ((void *)&out, &in, sizeof(IN_TYPE));
#if (OIIO_GNUC_VERSION >= 110000 || OIIO_CLANG_VERSION >= 100000 ||\
OIIO_APPLE_CLANG_VERSION >= 100000 || OIIO_INTEL_CLANG_VERSION >= 20220000) \
&& !defined(__CUDA_ARCH__)
// Use __builtin_bit_cast for gcc/clang if available
return __builtin_bit_cast(To, in);
#else
// NOTE: this is the only standards compliant way of doing this type of
// casting, luckily the compilers we care about know how to optimize away
// this idiom.
To out;
memcpy ((void *)&out, &in, sizeof(From));
return out;
#endif
}

#if OIIO_VERSION_LESS(3, 0, 0)
Expand All @@ -787,41 +795,6 @@ OIIO_FORCEINLINE OIIO_HOSTDEVICE OUT_TYPE bit_cast (const IN_TYPE& in) {
}
#endif

#if defined(__x86_64__) && !defined(__CUDA_ARCH__) && \
(defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) \
|| OIIO_CLANG_VERSION >= 100000 || OIIO_APPLE_CLANG_VERSION >= 130000)
// On x86/x86_64 for certain compilers we can use Intel CPU intrinsics for
// some common bitcast cases that might be even more understandable to the
// compiler and generate better code without its getting confused about the
// memcpy in the general case. We're a bit conservative with the compiler
// version checks here, it may be that some earlier versions support these
// intrinsics.

template<> OIIO_FORCEINLINE uint32_t bitcast<uint32_t, float>(const float& val) noexcept {
return static_cast<uint32_t>(_castf32_u32(val));
}
template<> OIIO_FORCEINLINE int32_t bitcast<int32_t, float>(const float& val) noexcept {
return static_cast<int32_t>(_castf32_u32(val));
}
template<> OIIO_FORCEINLINE float bitcast<float, uint32_t>(const uint32_t& val) noexcept {
return _castu32_f32(val);
}
template<> OIIO_FORCEINLINE float bitcast<float, int32_t>(const int32_t& val) noexcept {
return _castu32_f32(val);
}
template<> OIIO_FORCEINLINE uint64_t bitcast<uint64_t, double>(const double& val) noexcept {
return static_cast<uint64_t>(_castf64_u64(val));
}
template<> OIIO_FORCEINLINE int64_t bitcast<int64_t, double>(const double& val) noexcept {
return static_cast<int64_t>(_castf64_u64(val));
}
template<> OIIO_FORCEINLINE double bitcast<double, uint64_t>(const uint64_t& val) noexcept {
return _castu64_f64(val);
}
template<> OIIO_FORCEINLINE double bitcast<double, int64_t>(const int64_t& val) noexcept {
return _castu64_f64(val);
}
#endif


OIIO_FORCEINLINE OIIO_HOSTDEVICE int bitcast_to_int (float x) {
Expand Down

0 comments on commit 2b754c6

Please sign in to comment.