diff --git a/CMakeLists.txt b/CMakeLists.txt index f67e61d4ce6..348815f54cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,10 @@ set(FILAMENT_METAL_HANDLE_ARENA_SIZE_IN_MB "8" CACHE STRING "Size of the Metal handle arena, default 8." ) +set(FILAMENT_BACKEND_DEBUG_FLAG "" CACHE STRING + "A debug flag meant for enabling/disabling backend debugging paths" +) + # Enable exceptions by default in spirv-cross. set(SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS OFF) @@ -548,6 +552,12 @@ if (FILAMENT_SAMPLES_STEREO_TYPE STREQUAL "multiview") set(FILAMENT_ENABLE_MULTIVIEW ON) endif () +# Define backend flag for debug only +if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT FILAMENT_BACKEND_DEBUG_FLAG STREQUAL "") + add_definitions(-DFILAMENT_BACKEND_DEBUG_FLAG=${FILAMENT_BACKEND_DEBUG_FLAG}) + unset(FILAMENT_BACKEND_DEBUG_FLAG) +endif() + # ================================================================================================== # Material compilation flags # ================================================================================================== diff --git a/NEW_RELEASE_NOTES.md b/NEW_RELEASE_NOTES.md index 4a1a9c7fa7e..e4728cb677a 100644 --- a/NEW_RELEASE_NOTES.md +++ b/NEW_RELEASE_NOTES.md @@ -7,3 +7,6 @@ for next branch cut* header. appropriate header in [RELEASE_NOTES.md](./RELEASE_NOTES.md). ## Release notes for next branch cut + +- filagui: Fix regression which broke WebGL +- Add a new Engine::Config setting to control preferred shader language diff --git a/README.md b/README.md index 01948e41c86..f571123d1aa 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ repositories { } dependencies { - implementation 'com.google.android.filament:filament-android:1.51.6' + implementation 'com.google.android.filament:filament-android:1.51.7' } ``` @@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`: iOS projects can use CocoaPods to install the latest release: ```shell -pod 'Filament', '~> 1.51.6' +pod 'Filament', '~> 1.51.7' ``` ### Snapshots diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 7c6d175e8ec..e09d514005a 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,6 +7,13 @@ A new header is inserted each time a *tag* is created. Instead, if you are authoring a PR for the main branch, add your release note to [NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md). +## v1.51.7 + +- Add new matedit tool +- filagui: Support rendering `GL_TEXTURE_EXTERNAL_OES` textures. +- `setFrameScheduledCallback` now takes a `utils::Invocable`. +- engine: Add `isPaused()` + ## v1.51.6 - Add new matedit tool diff --git a/android/filament-android/src/main/cpp/Engine.cpp b/android/filament-android/src/main/cpp/Engine.cpp index 713baa53edb..ef67358079d 100644 --- a/android/filament-android/src/main/cpp/Engine.cpp +++ b/android/filament-android/src/main/cpp/Engine.cpp @@ -406,6 +406,13 @@ Java_com_google_android_filament_Engine_nFlush(JNIEnv*, jclass, engine->flush(); } +extern "C" JNIEXPORT jboolean JNICALL +Java_com_google_android_filament_Engine_nIsPaused(JNIEnv*, jclass, + jlong nativeEngine) { + Engine* engine = (Engine*) nativeEngine; + return (jboolean)engine->isPaused(); +} + extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetPaused(JNIEnv*, jclass, jlong nativeEngine, jboolean paused) { diff --git a/android/filament-android/src/main/java/com/google/android/filament/Engine.java b/android/filament-android/src/main/java/com/google/android/filament/Engine.java index a7ec77a71cd..9f8f478009e 100644 --- a/android/filament-android/src/main/java/com/google/android/filament/Engine.java +++ b/android/filament-android/src/main/java/com/google/android/filament/Engine.java @@ -1227,6 +1227,17 @@ public void flush() { nFlush(getNativeObject()); } + /** + * Get paused state of rendering thread. + * + *

Warning: This is an experimental API. + * + * @see #setPaused + */ + public boolean isPaused() { + return nIsPaused(getNativeObject()); + } + /** * Pause or resume the rendering thread. * @@ -1319,6 +1330,7 @@ private static void assertDestroy(boolean success) { private static native void nDestroyEntity(long nativeEngine, int entity); private static native void nFlushAndWait(long nativeEngine); private static native void nFlush(long nativeEngine); + private static native boolean nIsPaused(long nativeEngine); private static native void nSetPaused(long nativeEngine, boolean paused); private static native long nGetTransformManager(long nativeEngine); private static native long nGetLightManager(long nativeEngine); diff --git a/android/gradle.properties b/android/gradle.properties index 87decc65c2c..fd04602ea9c 100644 --- a/android/gradle.properties +++ b/android/gradle.properties @@ -1,5 +1,5 @@ GROUP=com.google.android.filament -VERSION_NAME=1.51.6 +VERSION_NAME=1.51.7 POM_DESCRIPTION=Real-time physically based rendering engine for Android. diff --git a/build.sh b/build.sh index 56562cc1c6c..434a0045fa3 100755 --- a/build.sh +++ b/build.sh @@ -61,6 +61,11 @@ function print_help { echo " -b" echo " Enable Address and Undefined Behavior Sanitizers (asan/ubsan) for debugging." echo " This is only for the desktop build." + echo " -x value" + echo " Define a preprocessor flag FILAMENT_BACKEND_DEBUG_FLAG with [value]. This is useful for" + echo " enabling debug paths in the backend from the build script. For example, make a" + echo " systrace-enabled build without directly changing #defines. Remember to add -f when" + echo " changing this option." echo "" echo "Build types:" echo " release" @@ -172,6 +177,8 @@ MATOPT_GRADLE_OPTION="" ASAN_UBSAN_OPTION="" +BACKEND_DEBUG_FLAG_OPTION="" + IOS_BUILD_SIMULATOR=false BUILD_UNIVERSAL_LIBRARIES=false @@ -231,6 +238,7 @@ function build_desktop_target { ${MATDBG_OPTION} \ ${MATOPT_OPTION} \ ${ASAN_UBSAN_OPTION} \ + ${BACKEND_DEBUG_FLAG_OPTION} \ ${architectures} \ ../.. ln -sf "out/cmake-${lc_target}/compile_commands.json" \ @@ -289,6 +297,7 @@ function build_webgl_with_target { -DCMAKE_BUILD_TYPE="$1" \ -DCMAKE_INSTALL_PREFIX="../webgl-${lc_target}/filament" \ -DWEBGL=1 \ + ${BACKEND_DEBUG_FLAG_OPTION} \ ../.. ln -sf "out/cmake-webgl-${lc_target}/compile_commands.json" \ ../../compile_commands.json @@ -363,6 +372,7 @@ function build_android_target { ${MATDBG_OPTION} \ ${MATOPT_OPTION} \ ${VULKAN_ANDROID_OPTION} \ + ${BACKEND_DEBUG_FLAG_OPTION} \ ../.. ln -sf "out/cmake-android-${lc_target}-${arch}/compile_commands.json" \ ../../compile_commands.json @@ -597,6 +607,7 @@ function build_ios_target { -DCMAKE_TOOLCHAIN_FILE=../../third_party/clang/iOS.cmake \ ${MATDBG_OPTION} \ ${MATOPT_OPTION} \ + ${BACKEND_DEBUG_FLAG_OPTION} \ ../.. ln -sf "out/cmake-ios-${lc_target}-${arch}/compile_commands.json" \ ../../compile_commands.json @@ -730,6 +741,13 @@ function validate_build_command { exit 1 fi fi + + # Make sure FILAMENT_BACKEND_DEBUG_FLAG is only meant for debug builds + if [[ "${ISSUE_DEBUG_BUILD}" != "true" ]] && [[ ! -z "${BACKEND_DEBUG_FLAG_OPTION}" ]]; then + echo "Error: cannot specify FILAMENT_BACKEND_DEBUG_FLAG in non-debug build" + exit 1 + fi + set -e } @@ -776,7 +794,7 @@ function check_debug_release_build { pushd "$(dirname "$0")" > /dev/null -while getopts ":hacCfgijmp:q:uvslwtedk:b" opt; do +while getopts ":hacCfgijmp:q:uvslwtedk:bx:" opt; do case ${opt} in h) print_help @@ -840,7 +858,7 @@ while getopts ":hacCfgijmp:q:uvslwtedk:b" opt; do echo "Platform must be one of [desktop|android|ios|webgl|all]" echo "" exit 1 - ;; + ;; esac done ;; @@ -918,6 +936,8 @@ while getopts ":hacCfgijmp:q:uvslwtedk:b" opt; do b) ASAN_UBSAN_OPTION="-DFILAMENT_ENABLE_ASAN_UBSAN=ON" echo "Enabled ASAN/UBSAN" ;; + x) BACKEND_DEBUG_FLAG_OPTION="-DFILAMENT_BACKEND_DEBUG_FLAG=${OPTARG}" + ;; \?) echo "Invalid option: -${OPTARG}" >&2 echo "" diff --git a/filament/CMakeLists.txt b/filament/CMakeLists.txt index b978d2da1bd..796acedb7d2 100644 --- a/filament/CMakeLists.txt +++ b/filament/CMakeLists.txt @@ -163,6 +163,7 @@ set(PRIVATE_HDRS src/ResourceList.h src/ShadowMap.h src/ShadowMapManager.h + src/SharedHandle.h src/TypedUniformBuffer.h src/UniformBuffer.h src/components/CameraManager.h @@ -214,6 +215,7 @@ set(PRIVATE_HDRS set(MATERIAL_SRCS src/materials/antiAliasing/fxaa.mat src/materials/antiAliasing/taa.mat + src/materials/blitDepth.mat src/materials/blitLow.mat src/materials/blitArray.mat src/materials/bloom/bloomDownsample.mat diff --git a/filament/backend/include/backend/DriverEnums.h b/filament/backend/include/backend/DriverEnums.h index 411aa65a99d..969632c327a 100644 --- a/filament/backend/include/backend/DriverEnums.h +++ b/filament/backend/include/backend/DriverEnums.h @@ -24,6 +24,7 @@ #include +#include #include #include @@ -1224,7 +1225,7 @@ static_assert(sizeof(StencilState::StencilOperations) == 5u, static_assert(sizeof(StencilState) == 12u, "StencilState size not what was intended"); -using FrameScheduledCallback = void(*)(PresentCallable callable, void* user); +using FrameScheduledCallback = utils::Invocable; enum class Workaround : uint16_t { // The EASU pass must split because shader compiler flattens early-exit branch diff --git a/filament/backend/include/backend/Handle.h b/filament/backend/include/backend/Handle.h index 4b63607a1cf..c54e9609cef 100644 --- a/filament/backend/include/backend/Handle.h +++ b/filament/backend/include/backend/Handle.h @@ -75,6 +75,19 @@ class HandleBase { HandleBase(HandleBase const& rhs) noexcept = default; HandleBase& operator=(HandleBase const& rhs) noexcept = default; + HandleBase(HandleBase&& rhs) noexcept + : object(rhs.object) { + rhs.object = nullid; + } + + HandleBase& operator=(HandleBase&& rhs) noexcept { + if (this != &rhs) { + object = rhs.object; + rhs.object = nullid; + } + return *this; + } + private: HandleId object; }; @@ -89,8 +102,10 @@ struct Handle : public HandleBase { Handle() noexcept = default; Handle(Handle const& rhs) noexcept = default; + Handle(Handle&& rhs) noexcept = default; Handle& operator=(Handle const& rhs) noexcept = default; + Handle& operator=(Handle&& rhs) noexcept = default; explicit Handle(HandleId id) noexcept : HandleBase(id) { } diff --git a/filament/backend/include/backend/PresentCallable.h b/filament/backend/include/backend/PresentCallable.h index 4402f22266d..f37d7704b49 100644 --- a/filament/backend/include/backend/PresentCallable.h +++ b/filament/backend/include/backend/PresentCallable.h @@ -48,7 +48,7 @@ namespace filament::backend { * and optional user data: * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * swapChain->setFrameScheduledCallback(myFrameScheduledCallback, nullptr); + * swapChain->setFrameScheduledCallback(nullptr, myFrameScheduledCallback); * if (renderer->beginFrame(swapChain)) { * renderer->render(view); * renderer->endFrame(); @@ -58,8 +58,6 @@ namespace filament::backend { * @remark Only Filament's Metal backend supports PresentCallables and frame callbacks. Other * backends ignore the callback (which will never be called) and proceed normally. * - * @remark The SwapChain::FrameScheduledCallback is called on an arbitrary thread. - * * Applications *must* call each PresentCallable they receive. Each PresentCallable represents a * frame that is waiting to be presented. If an application fails to call a PresentCallable, a * memory leak could occur. To "cancel" the presentation of a frame, pass false to the diff --git a/filament/backend/include/backend/platforms/OpenGLPlatform.h b/filament/backend/include/backend/platforms/OpenGLPlatform.h index dec6f47ba74..e00930c98ca 100644 --- a/filament/backend/include/backend/platforms/OpenGLPlatform.h +++ b/filament/backend/include/backend/platforms/OpenGLPlatform.h @@ -140,6 +140,23 @@ class OpenGLPlatform : public Platform { */ virtual uint32_t getDefaultFramebufferObject() noexcept; + /** + * Called by the backend when a frame starts. + * @param steady_clock_ns vsync time point on the monotonic clock + * @param refreshIntervalNs refresh interval in nanosecond + * @param frameId a frame id + */ + virtual void beginFrame( + int64_t monotonic_clock_ns, + int64_t refreshIntervalNs, + uint32_t frameId) noexcept; + + /** + * Called by the backend when a frame ends. + * @param frameId the frame id used in beginFrame + */ + virtual void endFrame( + uint32_t frameId) noexcept; /** * Type of contexts available @@ -191,6 +208,12 @@ class OpenGLPlatform : public Platform { utils::Invocable preContextChange, utils::Invocable postContextChange) noexcept; + /** + * Called by the backend just before calling commit() + * @see commit() + */ + virtual void preCommit() noexcept; + /** * Called by the driver once the current frame finishes drawing. Typically, this should present * the drawSwapChain. This is for example where `eglMakeCurrent()` would be called. diff --git a/filament/backend/include/backend/platforms/PlatformEGLAndroid.h b/filament/backend/include/backend/platforms/PlatformEGLAndroid.h index 32f830384d3..d0caeb8c89b 100644 --- a/filament/backend/include/backend/platforms/PlatformEGLAndroid.h +++ b/filament/backend/include/backend/platforms/PlatformEGLAndroid.h @@ -22,6 +22,10 @@ #include #include +#include + +#include + #include #include @@ -58,6 +62,13 @@ class PlatformEGLAndroid : public PlatformEGL { void terminate() noexcept override; + void beginFrame( + int64_t monotonic_clock_ns, + int64_t refreshIntervalNs, + uint32_t frameId) noexcept override; + + void preCommit() noexcept override; + /** * Set the presentation time using `eglPresentationTimeANDROID` * @param presentationTimeInNanosecond @@ -81,6 +92,11 @@ class PlatformEGLAndroid : public PlatformEGL { private: int mOSVersion; ExternalStreamManagerAndroid& mExternalStreamManager; + utils::PerformanceHintManager mPerformanceHintManager; + utils::PerformanceHintManager::Session mPerformanceHintSession; + + using clock = std::chrono::high_resolution_clock; + clock::time_point mStartTimeOfActualWork; }; } // namespace filament::backend diff --git a/filament/backend/include/private/backend/CommandBufferQueue.h b/filament/backend/include/private/backend/CommandBufferQueue.h index 92bf7e1488c..e8ff9aa896c 100644 --- a/filament/backend/include/private/backend/CommandBufferQueue.h +++ b/filament/backend/include/private/backend/CommandBufferQueue.h @@ -82,6 +82,7 @@ class CommandBufferQueue { void requestExit(); // suspend or unsuspend the queue. + bool isPaused() const noexcept; void setPaused(bool paused); bool isExitRequested() const; diff --git a/filament/backend/include/private/backend/DriverAPI.inc b/filament/backend/include/private/backend/DriverAPI.inc index 680a6bb0136..f729a370258 100644 --- a/filament/backend/include/private/backend/DriverAPI.inc +++ b/filament/backend/include/private/backend/DriverAPI.inc @@ -133,12 +133,13 @@ DECL_DRIVER_API_0(tick) DECL_DRIVER_API_N(beginFrame, int64_t, monotonic_clock_ns, + int64_t, refreshIntervalNs, uint32_t, frameId) DECL_DRIVER_API_N(setFrameScheduledCallback, backend::SwapChainHandle, sch, - backend::FrameScheduledCallback, callback, - void*, user) + backend::CallbackHandler*, handler, + backend::FrameScheduledCallback&&, callback) DECL_DRIVER_API_N(setFrameCompletedCallback, backend::SwapChainHandle, sch, @@ -303,6 +304,7 @@ DECL_DRIVER_API_SYNCHRONOUS_0(bool, isProtectedContentSupported) DECL_DRIVER_API_SYNCHRONOUS_N(bool, isStereoSupported, backend::StereoscopicType, stereoscopicType) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isParallelShaderCompileSupported) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isDepthStencilResolveSupported) +DECL_DRIVER_API_SYNCHRONOUS_N(bool, isDepthStencilBlitSupported, backend::TextureFormat, format) DECL_DRIVER_API_SYNCHRONOUS_0(bool, isProtectedTexturesSupported) DECL_DRIVER_API_SYNCHRONOUS_0(uint8_t, getMaxDrawBuffers) DECL_DRIVER_API_SYNCHRONOUS_0(size_t, getMaxUniformBufferSize) diff --git a/filament/backend/src/CommandBufferQueue.cpp b/filament/backend/src/CommandBufferQueue.cpp index b721ce0c50f..62af6499808 100644 --- a/filament/backend/src/CommandBufferQueue.cpp +++ b/filament/backend/src/CommandBufferQueue.cpp @@ -57,6 +57,11 @@ void CommandBufferQueue::requestExit() { mCondition.notify_one(); } +bool CommandBufferQueue::isPaused() const noexcept { + std::lock_guard const lock(mLock); + return mPaused; +} + void CommandBufferQueue::setPaused(bool paused) { std::lock_guard const lock(mLock); if (paused) { diff --git a/filament/backend/src/metal/MetalBuffer.h b/filament/backend/src/metal/MetalBuffer.h index 579975d0d6c..8586eeb845a 100644 --- a/filament/backend/src/metal/MetalBuffer.h +++ b/filament/backend/src/metal/MetalBuffer.h @@ -18,6 +18,7 @@ #define TNT_FILAMENT_DRIVER_METALBUFFER_H #include "MetalContext.h" +#include "MetalPlatform.h" #include @@ -33,15 +34,50 @@ namespace filament::backend { class TrackedMetalBuffer { public: + + static constexpr size_t EXCESS_BUFFER_COUNT = 30000; + + enum class Type { + NONE = 0, + GENERIC = 1, + RING = 2, + STAGING = 3, + }; + static constexpr size_t TypeCount = 3; + + static constexpr auto toIndex(Type t) { + assert_invariant(t != Type::NONE); + switch (t) { + case Type::NONE: + case Type::GENERIC: + return 0; + case Type::RING: + return 1; + case Type::STAGING: + return 2; + } + } + TrackedMetalBuffer() noexcept : mBuffer(nil) {} - TrackedMetalBuffer(id buffer) noexcept : mBuffer(buffer) { + TrackedMetalBuffer(nullptr_t) noexcept : mBuffer(nil) {} + TrackedMetalBuffer(id buffer, Type type) : mBuffer(buffer), mType(type) { + assert_invariant(type != Type::NONE); if (buffer) { - aliveBuffers++; + aliveBuffers[toIndex(type)]++; + mType = type; + if (getAliveBuffers() >= EXCESS_BUFFER_COUNT) { + if (platform && platform->hasDebugUpdateStatFunc()) { + platform->debugUpdateStat("filament.metal.excess_buffers_allocated", + TrackedMetalBuffer::getAliveBuffers()); + } + } } } + ~TrackedMetalBuffer() { if (mBuffer) { - aliveBuffers--; + assert_invariant(mType != Type::NONE); + aliveBuffers[toIndex(mType)]--; } } @@ -57,18 +93,31 @@ class TrackedMetalBuffer { id get() const noexcept { return mBuffer; } operator bool() const noexcept { return bool(mBuffer); } - static uint64_t getAliveBuffers() { return aliveBuffers; } + static uint64_t getAliveBuffers() { + uint64_t sum = 0; + for (const auto& v : aliveBuffers) { + sum += v; + } + return sum; + } + + static uint64_t getAliveBuffers(Type type) { + assert_invariant(type != Type::NONE); + return aliveBuffers[toIndex(type)]; + } + static void setPlatform(MetalPlatform* p) { platform = p; } private: void swap(TrackedMetalBuffer& other) noexcept { - id temp = mBuffer; - mBuffer = other.mBuffer; - other.mBuffer = temp; + std::swap(mBuffer, other.mBuffer); + std::swap(mType, other.mType); } id mBuffer; + Type mType = Type::NONE; - static std::atomic aliveBuffers; + static MetalPlatform* platform; + static std::array aliveBuffers; }; class MetalBuffer { @@ -171,7 +220,8 @@ class MetalRingBuffer { mBufferOptions(options), mSlotSizeBytes(computeSlotSize(layout)), mSlotCount(slotCount) { - mBuffer = [device newBufferWithLength:mSlotSizeBytes * mSlotCount options:mBufferOptions]; + mBuffer = { [device newBufferWithLength:mSlotSizeBytes * mSlotCount options:mBufferOptions], + TrackedMetalBuffer::Type::RING }; assert_invariant(mBuffer); } @@ -189,9 +239,10 @@ class MetalRingBuffer { // If we already have an aux buffer, it will get freed here, unless it has been retained // by a MTLCommandBuffer. In that case, it will be freed when the command buffer // finishes executing. - mAuxBuffer = [mDevice newBufferWithLength:mSlotSizeBytes options:mBufferOptions]; + mAuxBuffer = { [mDevice newBufferWithLength:mSlotSizeBytes options:mBufferOptions], + TrackedMetalBuffer::Type::RING }; assert_invariant(mAuxBuffer); - return {mAuxBuffer.get(), 0}; + return { mAuxBuffer.get(), 0 }; } mCurrentSlot = (mCurrentSlot + 1) % mSlotCount; mOccupiedSlots->fetch_add(1, std::memory_order_relaxed); diff --git a/filament/backend/src/metal/MetalBuffer.mm b/filament/backend/src/metal/MetalBuffer.mm index af46027e20d..ec8a8878e2e 100644 --- a/filament/backend/src/metal/MetalBuffer.mm +++ b/filament/backend/src/metal/MetalBuffer.mm @@ -22,7 +22,8 @@ namespace filament { namespace backend { -std::atomic TrackedMetalBuffer::aliveBuffers = 0; +std::array TrackedMetalBuffer::aliveBuffers = { 0 }; +MetalPlatform* TrackedMetalBuffer::platform = nullptr; MetalBuffer::MetalBuffer(MetalContext& context, BufferObjectBinding bindingType, BufferUsage usage, size_t size, bool forceGpuBuffer) : mBufferSize(size), mContext(context) { @@ -37,7 +38,8 @@ } // Otherwise, we allocate a private GPU buffer. - mBuffer = [context.device newBufferWithLength:size options:MTLResourceStorageModePrivate]; + mBuffer = { [context.device newBufferWithLength:size options:MTLResourceStorageModePrivate], + TrackedMetalBuffer::Type::GENERIC }; ASSERT_POSTCONDITION(mBuffer, "Could not allocate Metal buffer of size %zu.", size); } diff --git a/filament/backend/src/metal/MetalBufferPool.mm b/filament/backend/src/metal/MetalBufferPool.mm index 3b75c8e85d4..911bf84e4ac 100644 --- a/filament/backend/src/metal/MetalBufferPool.mm +++ b/filament/backend/src/metal/MetalBufferPool.mm @@ -46,7 +46,7 @@ options:MTLResourceStorageModeShared]; ASSERT_POSTCONDITION(buffer, "Could not allocate Metal staging buffer of size %zu.", numBytes); MetalBufferPoolEntry* stage = new MetalBufferPoolEntry { - .buffer = buffer, + .buffer = { buffer, TrackedMetalBuffer::Type::STAGING }, .capacity = numBytes, .lastAccessed = mCurrentFrame, .referenceCount = 1 diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm index f99a6b63a66..77ee6912dc0 100644 --- a/filament/backend/src/metal/MetalDriver.mm +++ b/filament/backend/src/metal/MetalDriver.mm @@ -105,6 +105,8 @@ driverConfig.disableHandleUseAfterFreeCheck) { mContext->driver = this; + TrackedMetalBuffer::setPlatform(platform); + mContext->device = mPlatform.createDevice(); assert_invariant(mContext->device); @@ -198,6 +200,7 @@ } MetalDriver::~MetalDriver() noexcept { + TrackedMetalBuffer::setPlatform(nullptr); mContext->device = nil; mContext->emptyTexture = nil; CFRelease(mContext->textureCache); @@ -212,19 +215,26 @@ executeTickOps(); } -void MetalDriver::beginFrame(int64_t monotonic_clock_ns, uint32_t frameId) { +void MetalDriver::beginFrame(int64_t monotonic_clock_ns, + int64_t refreshIntervalNs, uint32_t frameId) { #if defined(FILAMENT_METAL_PROFILING) os_signpost_interval_begin(mContext->log, mContext->signpostId, "Frame encoding", "%{public}d", frameId); #endif if (mPlatform.hasDebugUpdateStatFunc()) { mPlatform.debugUpdateStat("filament.metal.alive_buffers", TrackedMetalBuffer::getAliveBuffers()); + mPlatform.debugUpdateStat("filament.metal.alive_buffers.generic", + TrackedMetalBuffer::getAliveBuffers(TrackedMetalBuffer::Type::GENERIC)); + mPlatform.debugUpdateStat("filament.metal.alive_buffers.ring", + TrackedMetalBuffer::getAliveBuffers(TrackedMetalBuffer::Type::RING)); + mPlatform.debugUpdateStat("filament.metal.alive_buffers.staging", + TrackedMetalBuffer::getAliveBuffers(TrackedMetalBuffer::Type::STAGING)); } } -void MetalDriver::setFrameScheduledCallback(Handle sch, - FrameScheduledCallback callback, void* user) { +void MetalDriver::setFrameScheduledCallback( + Handle sch, CallbackHandler* handler, FrameScheduledCallback&& callback) { auto* swapChain = handle_cast(sch); - swapChain->setFrameScheduledCallback(callback, user); + swapChain->setFrameScheduledCallback(handler, std::move(callback)); } void MetalDriver::setFrameCompletedCallback(Handle sch, @@ -805,6 +815,10 @@ return false; } +bool MetalDriver::isDepthStencilBlitSupported(TextureFormat format) { + return true; +} + bool MetalDriver::isProtectedTexturesSupported() { return false; } diff --git a/filament/backend/src/metal/MetalHandles.h b/filament/backend/src/metal/MetalHandles.h index fea6f0947f9..c6c65e1f7d6 100644 --- a/filament/backend/src/metal/MetalHandles.h +++ b/filament/backend/src/metal/MetalHandles.h @@ -31,6 +31,8 @@ #include "private/backend/SamplerGroup.h" +#include + #include #include #include @@ -71,9 +73,9 @@ class MetalSwapChain : public HwSwapChain { void releaseDrawable(); - void setFrameScheduledCallback(FrameScheduledCallback callback, void* user); - void setFrameCompletedCallback(CallbackHandler* handler, - CallbackHandler::Callback callback, void* user); + void setFrameScheduledCallback(CallbackHandler* handler, FrameScheduledCallback&& callback); + void setFrameCompletedCallback( + CallbackHandler* handler, CallbackHandler::Callback callback, void* user); // For CAMetalLayer-backed SwapChains, presents the drawable or schedules a // FrameScheduledCallback. @@ -110,14 +112,15 @@ class MetalSwapChain : public HwSwapChain { MetalExternalImage externalImage; SwapChainType type; - // These two fields store a callback and user data to notify the client that a frame is ready - // for presentation. - // If frameScheduledCallback is nullptr, then the Metal backend automatically calls - // presentDrawable when the frame is committed. - // Otherwise, the Metal backend will not automatically present the frame. Instead, clients bear - // the responsibility of presenting the frame by calling the PresentCallable object. - FrameScheduledCallback frameScheduledCallback = nullptr; - void* frameScheduledUserData = nullptr; + // These fields store a callback to notify the client that a frame is ready for presentation. If + // !frameScheduled.callback, then the Metal backend automatically calls presentDrawable when the + // frame is committed. Otherwise, the Metal backend will not automatically present the frame. + // Instead, clients bear the responsibility of presenting the frame by calling the + // PresentCallable object. + struct { + CallbackHandler* handler = nullptr; + FrameScheduledCallback callback = {}; + } frameScheduled; struct { CallbackHandler* handler = nullptr; diff --git a/filament/backend/src/metal/MetalHandles.mm b/filament/backend/src/metal/MetalHandles.mm index 0d9976211da..e8ab879729a 100644 --- a/filament/backend/src/metal/MetalHandles.mm +++ b/filament/backend/src/metal/MetalHandles.mm @@ -221,9 +221,10 @@ static inline MTLTextureUsage getMetalTextureUsage(TextureUsage usage) { depthStencilTexture = [context.device newTextureWithDescriptor:descriptor]; } -void MetalSwapChain::setFrameScheduledCallback(FrameScheduledCallback callback, void* user) { - frameScheduledCallback = callback; - frameScheduledUserData = user; +void MetalSwapChain::setFrameScheduledCallback( + CallbackHandler* handler, FrameScheduledCallback&& callback) { + frameScheduled.handler = handler; + frameScheduled.callback = std::move(callback); } void MetalSwapChain::setFrameCompletedCallback(CallbackHandler* handler, @@ -238,7 +239,7 @@ static inline MTLTextureUsage getMetalTextureUsage(TextureUsage usage) { scheduleFrameCompletedCallback(); } if (drawable) { - if (frameScheduledCallback) { + if (frameScheduled.callback) { scheduleFrameScheduledCallback(); } else { [getPendingCommandBuffer(&context) presentDrawable:drawable]; @@ -296,21 +297,38 @@ void presentDrawable(bool presentFrame, void* user) { } void MetalSwapChain::scheduleFrameScheduledCallback() { - if (!frameScheduledCallback) { + if (!frameScheduled.callback) { return; } assert_invariant(drawable); - // Destroy this by calling maybePresentAndDestroyAsync() later. - auto* presentData = PresentDrawableData::create(drawable, context.driver); + struct Callback { + Callback(FrameScheduledCallback&& callback, id drawable, + MetalDriver* driver) + : f(std::move(callback)), data(PresentDrawableData::create(drawable, driver)) {} + FrameScheduledCallback f; + // PresentDrawableData* is destroyed by maybePresentAndDestroyAsync() later. + std::unique_ptr data; + static void func(void* user) { + auto* const c = reinterpret_cast(user); + PresentDrawableData* presentDrawableData = c->data.release(); + PresentCallable presentCallable(presentDrawable, presentDrawableData); + c->f(presentCallable); + delete c; + } + }; - FrameScheduledCallback userCallback = frameScheduledCallback; - void* userData = frameScheduledUserData; + // This callback pointer will be captured by the block. Even if the scheduled handler is never + // called, the unique_ptr will still ensure we don't leak memory. + __block auto callback = + std::make_unique(std::move(frameScheduled.callback), drawable, context.driver); + backend::CallbackHandler* handler = frameScheduled.handler; + MetalDriver* driver = context.driver; [getPendingCommandBuffer(&context) addScheduledHandler:^(id cb) { - PresentCallable callable(presentDrawable, static_cast(presentData)); - userCallback(callable, userData); + Callback* user = callback.release(); + driver->scheduleCallback(handler, user, &Callback::func); }]; } diff --git a/filament/backend/src/noop/NoopDriver.cpp b/filament/backend/src/noop/NoopDriver.cpp index 7a150a3e74a..9984bed9a68 100644 --- a/filament/backend/src/noop/NoopDriver.cpp +++ b/filament/backend/src/noop/NoopDriver.cpp @@ -49,11 +49,12 @@ void NoopDriver::terminate() { void NoopDriver::tick(int) { } -void NoopDriver::beginFrame(int64_t monotonic_clock_ns, uint32_t frameId) { +void NoopDriver::beginFrame(int64_t monotonic_clock_ns, + int64_t refreshIntervalNs, uint32_t frameId) { } void NoopDriver::setFrameScheduledCallback(Handle sch, - FrameScheduledCallback callback, void* user) { + CallbackHandler* handler, FrameScheduledCallback&& callback) { } @@ -193,6 +194,10 @@ bool NoopDriver::isDepthStencilResolveSupported() { return true; } +bool NoopDriver::isDepthStencilBlitSupported(TextureFormat format) { + return true; +} + bool NoopDriver::isProtectedTexturesSupported() { return true; } diff --git a/filament/backend/src/opengl/GLTexture.h b/filament/backend/src/opengl/GLTexture.h index 91aadfc36af..5e9460e17a5 100644 --- a/filament/backend/src/opengl/GLTexture.h +++ b/filament/backend/src/opengl/GLTexture.h @@ -30,7 +30,7 @@ namespace filament::backend { struct GLTexture : public HwTexture { using HwTexture::HwTexture; struct GL { - GL() noexcept : imported(false), sidecarSamples(1), reserved(0) {} + GL() noexcept : imported(false), sidecarSamples(1), reserved1(0) {} GLuint id = 0; // texture or renderbuffer id GLenum target = 0; GLenum internalFormat = 0; @@ -40,10 +40,10 @@ struct GLTexture : public HwTexture { GLfloat anisotropy = 1.0; int8_t baseLevel = 127; int8_t maxLevel = -1; - uint8_t targetIndex = 0; // optimization: index corresponding to target + uint8_t reserved0 = 0; bool imported : 1; uint8_t sidecarSamples : 4; - uint8_t reserved : 3; + uint8_t reserved1 : 3; } gl; OpenGLPlatform::ExternalTexture* externalTexture = nullptr; diff --git a/filament/backend/src/opengl/OpenGLContext.cpp b/filament/backend/src/opengl/OpenGLContext.cpp index 5355e6b1b95..f87c5004061 100644 --- a/filament/backend/src/opengl/OpenGLContext.cpp +++ b/filament/backend/src/opengl/OpenGLContext.cpp @@ -881,19 +881,28 @@ void OpenGLContext::pixelStore(GLenum pname, GLint param) noexcept { } } -void OpenGLContext::unbindTexture(GLenum target, GLuint texture_id) noexcept { +void OpenGLContext::unbindTexture( + UTILS_UNUSED_IN_RELEASE GLenum target, GLuint texture_id) noexcept { // unbind this texture from all the units it might be bound to // no need unbind the texture from FBOs because we're not tracking that state (and there is // no need to). - const size_t index = getIndexForTextureTarget(target); UTILS_NOUNROLL for (GLuint unit = 0; unit < MAX_TEXTURE_UNIT_COUNT; unit++) { - if (state.textures.units[unit].targets[index].texture_id == texture_id) { - bindTexture(unit, target, (GLuint)0, index); + if (state.textures.units[unit].id == texture_id) { + // if this texture is bound, it should be at the same target + assert_invariant(state.textures.units[unit].target == target); + unbindTextureUnit(unit); } } } +void OpenGLContext::unbindTextureUnit(GLuint unit) noexcept { + update_state(state.textures.units[unit].id, 0u, [&]() { + activeTexture(unit); + glBindTexture(state.textures.units[unit].target, 0u); + }); +} + void OpenGLContext::unbindSampler(GLuint sampler) noexcept { // unbind this sampler from all the units it might be bound to UTILS_NOUNROLL // clang generates >800B of code!!! diff --git a/filament/backend/src/opengl/OpenGLContext.h b/filament/backend/src/opengl/OpenGLContext.h index e6edc0c2ae9..ff01b76d648 100644 --- a/filament/backend/src/opengl/OpenGLContext.h +++ b/filament/backend/src/opengl/OpenGLContext.h @@ -137,7 +137,6 @@ class OpenGLContext final : public TimerQueryFactoryInterface { #endif } - constexpr static inline size_t getIndexForTextureTarget(GLuint target) noexcept; constexpr inline size_t getIndexForCap(GLenum cap) noexcept; constexpr static inline size_t getIndexForBufferTarget(GLenum target) noexcept; @@ -149,10 +148,10 @@ class OpenGLContext final : public TimerQueryFactoryInterface { void pixelStore(GLenum, GLint) noexcept; inline void activeTexture(GLuint unit) noexcept; - inline void bindTexture(GLuint unit, GLuint target, GLuint texId, size_t targetIndex) noexcept; inline void bindTexture(GLuint unit, GLuint target, GLuint texId) noexcept; void unbindTexture(GLenum target, GLuint id) noexcept; + void unbindTextureUnit(GLuint unit) noexcept; inline void bindVertexArray(RenderPrimitive const* p) noexcept; inline void bindSampler(GLuint unit, GLuint sampler) noexcept; void unbindSampler(GLuint sampler) noexcept; @@ -322,8 +321,14 @@ class OpenGLContext final : public TimerQueryFactoryInterface { // function to handle state changes we don't control void updateTexImage(GLenum target, GLuint id) noexcept { - const size_t index = getIndexForTextureTarget(target); - state.textures.units[state.textures.active].targets[index].texture_id = id; + assert_invariant(target == GL_TEXTURE_EXTERNAL_OES); + // if another target is bound to this texture unit, unbind that texture + if (UTILS_UNLIKELY(state.textures.units[state.textures.active].target != target)) { + glBindTexture(state.textures.units[state.textures.active].target, 0); + state.textures.units[state.textures.active].target = GL_TEXTURE_EXTERNAL_OES; + } + // the texture is already bound to `target`, we just update our internal state + state.textures.units[state.textures.active].id = id; } void resetProgram() noexcept { state.program.use = 0; } @@ -426,9 +431,8 @@ class OpenGLContext final : public TimerQueryFactoryInterface { GLuint active = 0; // zero-based struct { GLuint sampler = 0; - struct { - GLuint texture_id = 0; - } targets[7]; // this must match getIndexForTextureTarget() + GLuint target = 0; + GLuint id = 0; } units[MAX_TEXTURE_UNIT_COUNT]; } textures; @@ -598,31 +602,10 @@ class OpenGLContext final : public TimerQueryFactoryInterface { } void setDefaultState() noexcept; - - static constexpr const size_t TEXTURE_TARGET_COUNT = - sizeof(state.textures.units[0].targets) / sizeof(state.textures.units[0].targets[0]); - }; // ------------------------------------------------------------------------------------------------ -constexpr size_t OpenGLContext::getIndexForTextureTarget(GLuint target) noexcept { - // this must match state.textures[].targets[] - switch (target) { - case GL_TEXTURE_2D: return 0; - case GL_TEXTURE_2D_ARRAY: return 1; - case GL_TEXTURE_CUBE_MAP: return 2; -#if defined(BACKEND_OPENGL_LEVEL_GLES31) - case GL_TEXTURE_2D_MULTISAMPLE: return 3; -#endif - case GL_TEXTURE_EXTERNAL_OES: return 4; - case GL_TEXTURE_3D: return 5; - case GL_TEXTURE_CUBE_MAP_ARRAY: return 6; - default: - return 0; - } -} - constexpr size_t OpenGLContext::getIndexForCap(GLenum cap) noexcept { //NOLINT size_t index = 0; switch (cap) { @@ -770,19 +753,17 @@ void OpenGLContext::bindBufferRange(GLenum target, GLuint index, GLuint buffer, #endif } -void OpenGLContext::bindTexture(GLuint unit, GLuint target, GLuint texId, size_t targetIndex) noexcept { - assert_invariant(targetIndex == getIndexForTextureTarget(target)); - assert_invariant(targetIndex < TEXTURE_TARGET_COUNT); - update_state(state.textures.units[unit].targets[targetIndex].texture_id, texId, [&]() { +void OpenGLContext::bindTexture(GLuint unit, GLuint target, GLuint texId) noexcept { + update_state(state.textures.units[unit].target, target, [&]() { + activeTexture(unit); + glBindTexture(state.textures.units[unit].target, 0); + }); + update_state(state.textures.units[unit].id, texId, [&]() { activeTexture(unit); glBindTexture(target, texId); }, target == GL_TEXTURE_EXTERNAL_OES); } -void OpenGLContext::bindTexture(GLuint unit, GLuint target, GLuint texId) noexcept { - bindTexture(unit, target, texId, getIndexForTextureTarget(target)); -} - void OpenGLContext::useProgram(GLuint program) noexcept { update_state(state.program.use, program, [&]() { glUseProgram(program); diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp index 82723bcb3e0..ddf699b75f1 100644 --- a/filament/backend/src/opengl/OpenGLDriver.cpp +++ b/filament/backend/src/opengl/OpenGLDriver.cpp @@ -287,7 +287,7 @@ void OpenGLDriver::bindSampler(GLuint unit, GLuint sampler) noexcept { void OpenGLDriver::bindTexture(GLuint unit, GLTexture const* t) noexcept { assert_invariant(t != nullptr); - mContext.bindTexture(unit, t->gl.target, t->gl.id, t->gl.targetIndex); + mContext.bindTexture(unit, t->gl.target, t->gl.id); } bool OpenGLDriver::useProgram(OpenGLProgram* p) noexcept { @@ -749,7 +749,6 @@ void OpenGLDriver::createTextureR(Handle th, SamplerType target, uint if (t->externalTexture) { t->gl.target = t->externalTexture->target; t->gl.id = t->externalTexture->id; - t->gl.targetIndex = (uint8_t)OpenGLContext::getIndexForTextureTarget(t->gl.target); // internalFormat actually depends on the external image, but it doesn't matter // because it's not used anywhere for anything important. t->gl.internalFormat = internalFormat; @@ -761,30 +760,23 @@ void OpenGLDriver::createTextureR(Handle th, SamplerType target, uint t->gl.internalFormat = internalFormat; - // We DO NOT update targetIndex at function exit to take advantage of the fact that - // getIndexForTextureTarget() is constexpr -- so all of this disappears at compile time. switch (target) { case SamplerType::SAMPLER_EXTERNAL: // we can't be here -- doesn't matter what we do case SamplerType::SAMPLER_2D: t->gl.target = GL_TEXTURE_2D; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_2D); break; case SamplerType::SAMPLER_3D: t->gl.target = GL_TEXTURE_3D; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_3D); break; case SamplerType::SAMPLER_2D_ARRAY: t->gl.target = GL_TEXTURE_2D_ARRAY; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_2D_ARRAY); break; case SamplerType::SAMPLER_CUBEMAP: t->gl.target = GL_TEXTURE_CUBE_MAP; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_CUBE_MAP); break; case SamplerType::SAMPLER_CUBEMAP_ARRAY: t->gl.target = GL_TEXTURE_CUBE_MAP_ARRAY; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_CUBE_MAP_ARRAY); break; } @@ -795,8 +787,6 @@ void OpenGLDriver::createTextureR(Handle th, SamplerType target, uint if (gl.features.multisample_texture) { // multi-sample texture on GL 3.2 / GLES 3.1 and above t->gl.target = GL_TEXTURE_2D_MULTISAMPLE; - t->gl.targetIndex = (uint8_t) - OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_2D_MULTISAMPLE); } else { // Turn off multi-sampling for that texture. It's just not supported. } @@ -855,32 +845,24 @@ void OpenGLDriver::importTextureR(Handle th, intptr_t id, t->gl.internalFormat = getInternalFormat(format); assert_invariant(t->gl.internalFormat); - // We DO NOT update targetIndex at function exit to take advantage of the fact that - // getIndexForTextureTarget() is constexpr -- so all of this disappears at compile time. switch (target) { case SamplerType::SAMPLER_EXTERNAL: t->gl.target = GL_TEXTURE_EXTERNAL_OES; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_EXTERNAL_OES); break; case SamplerType::SAMPLER_2D: t->gl.target = GL_TEXTURE_2D; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_2D); break; case SamplerType::SAMPLER_3D: t->gl.target = GL_TEXTURE_3D; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_3D); break; case SamplerType::SAMPLER_2D_ARRAY: t->gl.target = GL_TEXTURE_2D_ARRAY; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_2D_ARRAY); break; case SamplerType::SAMPLER_CUBEMAP: t->gl.target = GL_TEXTURE_CUBE_MAP; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_CUBE_MAP); break; case SamplerType::SAMPLER_CUBEMAP_ARRAY: t->gl.target = GL_TEXTURE_CUBE_MAP_ARRAY; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_CUBE_MAP_ARRAY); break; } @@ -891,7 +873,6 @@ void OpenGLDriver::importTextureR(Handle th, intptr_t id, if (gl.features.multisample_texture) { // multi-sample texture on GL 3.2 / GLES 3.1 and above t->gl.target = GL_TEXTURE_2D_MULTISAMPLE; - t->gl.targetIndex = OpenGLContext::getIndexForTextureTarget(GL_TEXTURE_2D_MULTISAMPLE); } else { // Turn off multi-sampling for that texture. It's just not supported. } @@ -1781,7 +1762,6 @@ void OpenGLDriver::updateStreams(DriverApi* driver) { // the target and id can be reset each time t->gl.target = t->externalTexture->target; t->gl.id = t->externalTexture->id; - t->gl.targetIndex = (uint8_t)OpenGLContext::getIndexForTextureTarget(t->gl.target); bindTexture(OpenGLContext::DUMMY_TEXTURE_BINDING, t); } } @@ -2043,6 +2023,10 @@ bool OpenGLDriver::isDepthStencilResolveSupported() { return true; } +bool OpenGLDriver::isDepthStencilBlitSupported(TextureFormat format) { + return true; +} + bool OpenGLDriver::isProtectedTexturesSupported() { return getContext().ext.EXT_protected_textures; } @@ -2667,7 +2651,6 @@ void OpenGLDriver::setExternalImage(Handle th, void* image) { // the target and id can be reset each time t->gl.target = t->externalTexture->target; t->gl.id = t->externalTexture->id; - t->gl.targetIndex = (uint8_t)OpenGLContext::getIndexForTextureTarget(t->gl.target); bindTexture(OpenGLContext::DUMMY_TEXTURE_BINDING, t); } } @@ -3406,10 +3389,12 @@ void OpenGLDriver::tick(int) { void OpenGLDriver::beginFrame( UTILS_UNUSED int64_t monotonic_clock_ns, + UTILS_UNUSED int64_t refreshIntervalNs, UTILS_UNUSED uint32_t frameId) { DEBUG_MARKER() auto& gl = mContext; insertEventMarker("beginFrame"); + mPlatform.beginFrame(monotonic_clock_ns, refreshIntervalNs, frameId); if (UTILS_UNLIKELY(!mTexturesWithStreamsAttached.empty())) { OpenGLPlatform& platform = mPlatform; for (GLTexture const* t : mTexturesWithStreamsAttached) { @@ -3418,7 +3403,7 @@ void OpenGLDriver::beginFrame( assert_invariant(t->hwStream->stream); platform.updateTexImage(t->hwStream->stream, &static_cast(t->hwStream)->user_thread.timestamp); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast) - // NOTE: We assume that updateTexImage() binds the texture on our behalf + // NOTE: We assume that OpenGLPlatform::updateTexImage() binds the texture on our behalf gl.updateTexImage(GL_TEXTURE_EXTERNAL_OES, t->gl.id); } } @@ -3426,7 +3411,7 @@ void OpenGLDriver::beginFrame( } void OpenGLDriver::setFrameScheduledCallback(Handle sch, - FrameScheduledCallback callback, void* user) { + CallbackHandler* handler, FrameScheduledCallback&& callback) { DEBUG_MARKER() } @@ -3457,6 +3442,7 @@ void OpenGLDriver::endFrame(UTILS_UNUSED uint32_t frameId) { #endif //SYSTRACE_NAME("glFinish"); //glFinish(); + mPlatform.endFrame(frameId); insertEventMarker("endFrame"); } diff --git a/filament/backend/src/opengl/OpenGLPlatform.cpp b/filament/backend/src/opengl/OpenGLPlatform.cpp index 17359e85f7b..94c3b991126 100644 --- a/filament/backend/src/opengl/OpenGLPlatform.cpp +++ b/filament/backend/src/opengl/OpenGLPlatform.cpp @@ -55,6 +55,16 @@ uint32_t OpenGLPlatform::getDefaultFramebufferObject() noexcept { return 0; } +void OpenGLPlatform::beginFrame(int64_t monotonic_clock_ns, int64_t refreshIntervalNs, + uint32_t frameId) noexcept { +} + +void OpenGLPlatform::endFrame(uint32_t frameId) noexcept { +} + +void OpenGLPlatform::preCommit() noexcept { +} + OpenGLPlatform::ContextType OpenGLPlatform::getCurrentContextType() const noexcept { return ContextType::UNPROTECTED; } diff --git a/filament/backend/src/opengl/platforms/PlatformEGLAndroid.cpp b/filament/backend/src/opengl/platforms/PlatformEGLAndroid.cpp index addc5e02719..94d602a0ba8 100644 --- a/filament/backend/src/opengl/platforms/PlatformEGLAndroid.cpp +++ b/filament/backend/src/opengl/platforms/PlatformEGLAndroid.cpp @@ -25,6 +25,8 @@ #include #include +#include + #include #include #include @@ -36,8 +38,11 @@ #include +#include #include +#include + #include #include #include @@ -112,8 +117,38 @@ void PlatformEGLAndroid::terminate() noexcept { PlatformEGL::terminate(); } +void PlatformEGLAndroid::beginFrame( + int64_t monotonic_clock_ns, + int64_t refreshIntervalNs, + uint32_t frameId) noexcept { + if (mPerformanceHintSession.isValid()) { + if (refreshIntervalNs <= 0) { + // we're not provided with a target time, assume 16.67ms + refreshIntervalNs = 16'666'667; + } + mStartTimeOfActualWork = clock::time_point(std::chrono::nanoseconds(monotonic_clock_ns)); + mPerformanceHintSession.updateTargetWorkDuration(refreshIntervalNs); + } + PlatformEGL::beginFrame(monotonic_clock_ns, refreshIntervalNs, frameId); +} + +void backend::PlatformEGLAndroid::preCommit() noexcept { + if (mPerformanceHintSession.isValid()) { + auto const actualWorkDuration = std::chrono::duration_cast( + clock::now() - mStartTimeOfActualWork); + mPerformanceHintSession.reportActualWorkDuration(actualWorkDuration.count()); + } + PlatformEGL::preCommit(); +} + Driver* PlatformEGLAndroid::createDriver(void* sharedContext, const Platform::DriverConfig& driverConfig) noexcept { + + // the refresh rate default value doesn't matter, we change it later + int32_t const tid = gettid(); + mPerformanceHintSession = PerformanceHintManager::Session{ + mPerformanceHintManager, &tid, 1, 16'666'667 }; + Driver* driver = PlatformEGL::createDriver(sharedContext, driverConfig); auto extensions = GLUtils::split(eglQueryString(mEGLDisplay, EGL_EXTENSIONS)); diff --git a/filament/backend/src/vulkan/VulkanBlitter.cpp b/filament/backend/src/vulkan/VulkanBlitter.cpp index c4316cf1ec8..967337c769b 100644 --- a/filament/backend/src/vulkan/VulkanBlitter.cpp +++ b/filament/backend/src/vulkan/VulkanBlitter.cpp @@ -26,21 +26,16 @@ #include -#include "generated/vkshaders/vkshaders.h" - using namespace bluevk; using namespace utils; namespace filament::backend { -using ImgUtil = VulkanImageUtility; - namespace { inline void blitFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect, VkFilter filter, VulkanAttachment src, VulkanAttachment dst, const VkOffset3D srcRect[2], const VkOffset3D dstRect[2]) { - if constexpr (FVK_ENABLED(FVK_DEBUG_BLITTER)) { utils::slog.d << "Fast blit from=" << src.texture->getVkImage() << ",level=" << (int) src.level << " layout=" << src.getLayout() @@ -48,21 +43,8 @@ inline void blitFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect, << " layout=" << dst.getLayout() << utils::io::endl; } - const VkImageSubresourceRange srcRange = { - .aspectMask = aspect, - .baseMipLevel = src.level, - .levelCount = 1, - .baseArrayLayer = src.layer, - .layerCount = 1, - }; - - const VkImageSubresourceRange dstRange = { - .aspectMask = aspect, - .baseMipLevel = dst.level, - .levelCount = 1, - .baseArrayLayer = dst.layer, - .layerCount = 1, - }; + VkImageSubresourceRange const srcRange = src.getSubresourceRange(); + VkImageSubresourceRange const dstRange = dst.getSubresourceRange(); VulkanLayout oldSrcLayout = src.getLayout(); VulkanLayout oldDstLayout = dst.getLayout(); @@ -77,15 +59,15 @@ inline void blitFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect, .dstOffsets = { dstRect[0], dstRect[1] }, }}; vkCmdBlitImage(cmdbuffer, - src.getImage(), ImgUtil::getVkLayout(VulkanLayout::TRANSFER_SRC), - dst.getImage(), ImgUtil::getVkLayout(VulkanLayout::TRANSFER_DST), + src.getImage(), imgutil::getVkLayout(VulkanLayout::TRANSFER_SRC), + dst.getImage(), imgutil::getVkLayout(VulkanLayout::TRANSFER_DST), 1, blitRegions, filter); if (oldSrcLayout == VulkanLayout::UNDEFINED) { - oldSrcLayout = ImgUtil::getDefaultLayout(src.texture->usage); + oldSrcLayout = imgutil::getDefaultLayout(src.texture->usage); } if (oldDstLayout == VulkanLayout::UNDEFINED) { - oldDstLayout = ImgUtil::getDefaultLayout(dst.texture->usage); + oldDstLayout = imgutil::getDefaultLayout(dst.texture->usage); } src.texture->transitionLayout(cmdbuffer, srcRange, oldSrcLayout); dst.texture->transitionLayout(cmdbuffer, dstRange, oldDstLayout); @@ -93,7 +75,6 @@ inline void blitFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect, inline void resolveFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect, VulkanAttachment src, VulkanAttachment dst) { - if constexpr (FVK_ENABLED(FVK_DEBUG_BLITTER)) { utils::slog.d << "Fast blit from=" << src.texture->getVkImage() << ",level=" << (int) src.level << " layout=" << src.getLayout() @@ -101,21 +82,8 @@ inline void resolveFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspe << " layout=" << dst.getLayout() << utils::io::endl; } - const VkImageSubresourceRange srcRange = { - .aspectMask = aspect, - .baseMipLevel = src.level, - .levelCount = 1, - .baseArrayLayer = src.layer, - .layerCount = 1, - }; - - const VkImageSubresourceRange dstRange = { - .aspectMask = aspect, - .baseMipLevel = dst.level, - .levelCount = 1, - .baseArrayLayer = dst.layer, - .layerCount = 1, - }; + VkImageSubresourceRange const srcRange = src.getSubresourceRange(); + VkImageSubresourceRange const dstRange = dst.getSubresourceRange(); VulkanLayout oldSrcLayout = src.getLayout(); VulkanLayout oldDstLayout = dst.getLayout(); @@ -133,15 +101,15 @@ inline void resolveFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspe .extent = { src.getExtent2D().width, src.getExtent2D().height, 1 }, }}; vkCmdResolveImage(cmdbuffer, - src.getImage(), ImgUtil::getVkLayout(VulkanLayout::TRANSFER_SRC), - dst.getImage(), ImgUtil::getVkLayout(VulkanLayout::TRANSFER_DST), + src.getImage(), imgutil::getVkLayout(VulkanLayout::TRANSFER_SRC), + dst.getImage(), imgutil::getVkLayout(VulkanLayout::TRANSFER_DST), 1, resolveRegions); if (oldSrcLayout == VulkanLayout::UNDEFINED) { - oldSrcLayout = ImgUtil::getDefaultLayout(src.texture->usage); + oldSrcLayout = imgutil::getDefaultLayout(src.texture->usage); } if (oldDstLayout == VulkanLayout::UNDEFINED) { - oldDstLayout = ImgUtil::getDefaultLayout(dst.texture->usage); + oldDstLayout = imgutil::getDefaultLayout(dst.texture->usage); } src.texture->transitionLayout(cmdbuffer, srcRange, oldSrcLayout); dst.texture->transitionLayout(cmdbuffer, dstRange, oldDstLayout); diff --git a/filament/backend/src/vulkan/VulkanBuffer.cpp b/filament/backend/src/vulkan/VulkanBuffer.cpp index 1b5f59eafd6..bc09c7f5213 100644 --- a/filament/backend/src/vulkan/VulkanBuffer.cpp +++ b/filament/backend/src/vulkan/VulkanBuffer.cpp @@ -27,8 +27,8 @@ VulkanBuffer::VulkanBuffer(VmaAllocator allocator, VulkanStagePool& stagePool, VkBufferUsageFlags usage, uint32_t numBytes) : mAllocator(allocator), mStagePool(stagePool), - mUsage(usage) { - + mUsage(usage), + mUpdatedBytes(0) { // for now make sure that only 1 bit is set in usage // (because loadFromCpu() assumes that somewhat) assert_invariant(usage && !(usage & (usage - 1))); @@ -49,7 +49,7 @@ VulkanBuffer::~VulkanBuffer() { } void VulkanBuffer::loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint32_t byteOffset, - uint32_t numBytes) const { + uint32_t numBytes) { assert_invariant(byteOffset == 0); VulkanStage const* stage = mStagePool.acquireStage(numBytes); void* mapped; @@ -58,15 +58,47 @@ void VulkanBuffer::loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint vmaUnmapMemory(mAllocator, stage->memory); vmaFlushAllocation(mAllocator, stage->memory, byteOffset, numBytes); + // If there was a previous update, then we need to make sure the following write is properly + // synced with the previous read. + if (mUpdatedBytes > 0) { + VkAccessFlags srcAccess = 0; + VkPipelineStageFlags srcStage = 0; + if (mUsage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { + srcAccess = VK_ACCESS_SHADER_READ_BIT; + srcStage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } else if (mUsage & VK_BUFFER_USAGE_VERTEX_BUFFER_BIT) { + srcAccess = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + srcStage = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + } else if (mUsage & VK_BUFFER_USAGE_INDEX_BUFFER_BIT) { + srcAccess = VK_ACCESS_INDEX_READ_BIT; + srcStage = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + } + + VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = srcAccess, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = mGpuBuffer, + .size = mUpdatedBytes, + }; + vkCmdPipelineBarrier(cmdbuf, srcStage, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 1, + &barrier, 0, nullptr); + } + VkBufferCopy region{ .size = numBytes }; vkCmdCopyBuffer(cmdbuf, stage->buffer, mGpuBuffer, 1, ®ion); + mUpdatedBytes = numBytes; + // Firstly, ensure that the copy finishes before the next draw call. // Secondly, in case the user decides to upload another chunk (without ever using the first one) // we need to ensure that this upload completes first (hence // dstStageMask=VK_PIPELINE_STAGE_TRANSFER_BIT). VkAccessFlags dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + if (mUsage & VK_BUFFER_USAGE_VERTEX_BUFFER_BIT) { dstAccessMask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; dstStageMask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; @@ -75,26 +107,24 @@ void VulkanBuffer::loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint dstStageMask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; } else if (mUsage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { dstAccessMask |= VK_ACCESS_UNIFORM_READ_BIT; - // NOTE: ideally dstStageMask would include VERTEX_SHADER_BIT | FRAGMENT_SHADER_BIT, but - // this seems to be insufficient on Mali devices. To work around this we are using a more - // aggressive ALL_GRAPHICS_BIT barrier. - dstStageMask |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; + dstStageMask |= + (VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); } else if (mUsage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) { // TODO: implement me } VkBufferMemoryBarrier barrier{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = dstAccessMask, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = mGpuBuffer, - .size = VK_WHOLE_SIZE, + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = dstAccessMask, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = mGpuBuffer, + .size = VK_WHOLE_SIZE, }; vkCmdPipelineBarrier(cmdbuf, VK_PIPELINE_STAGE_TRANSFER_BIT, dstStageMask, 0, 0, nullptr, 1, - &barrier, 0, nullptr); + &barrier, 0, nullptr); } } // namespace filament::backend diff --git a/filament/backend/src/vulkan/VulkanBuffer.h b/filament/backend/src/vulkan/VulkanBuffer.h index db24edf3943..e496f930dbe 100644 --- a/filament/backend/src/vulkan/VulkanBuffer.h +++ b/filament/backend/src/vulkan/VulkanBuffer.h @@ -30,7 +30,7 @@ class VulkanBuffer { uint32_t numBytes); ~VulkanBuffer(); void loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint32_t byteOffset, - uint32_t numBytes) const; + uint32_t numBytes); VkBuffer getGpuBuffer() const { return mGpuBuffer; } @@ -42,6 +42,7 @@ class VulkanBuffer { VmaAllocation mGpuMemory = VK_NULL_HANDLE; VkBuffer mGpuBuffer = VK_NULL_HANDLE; VkBufferUsageFlags mUsage = {}; + uint32_t mUpdatedBytes = 0; }; } // namespace filament::backend diff --git a/filament/backend/src/vulkan/VulkanConstants.h b/filament/backend/src/vulkan/VulkanConstants.h index b4974950ef5..f870302e723 100644 --- a/filament/backend/src/vulkan/VulkanConstants.h +++ b/filament/backend/src/vulkan/VulkanConstants.h @@ -47,7 +47,7 @@ // granualarity of a renderpass. You can enable this along with FVK_DEBUG_DEBUG_UTILS to take // advantage of vkCmdBegin/EndDebugUtilsLabelEXT. You can also just enable this with // FVK_DEBUG_PRINT_GROUP_MARKERS to print the current marker to stdout. -#define FVK_DEBUG_GROUP_MARKERS 0x00000002 +#define FVK_DEBUG_GROUP_MARKERS 0x00000002 #define FVK_DEBUG_TEXTURE 0x00000004 #define FVK_DEBUG_LAYOUT_TRANSITION 0x00000008 @@ -75,18 +75,14 @@ #define FVK_DEBUG_PERFORMANCE \ FVK_DEBUG_SYSTRACE -#define FVK_DEBUG_CORRECTNESS \ - FVK_DEBUG_VALIDATION | \ - FVK_DEBUG_SHADER_MODULE | \ - FVK_DEBUG_TEXTURE | \ - FVK_DEBUG_LAYOUT_TRANSITION - -#define FVK_DEBUG_RENDER_PASSES \ - FVK_DEBUG_GROUP_MARKERS | \ - FVK_DEBUG_PRINT_GROUP_MARKERS +#if defined(FILAMENT_BACKEND_DEBUG_FLAG) +#define FVK_DEBUG_FORWARDED_FLAG (FILAMENT_BACKEND_DEBUG_FLAG & FVK_DEBUG_EVERYTHING) +#else +#define FVK_DEBUG_FORWARDED_FLAG 0 +#endif #ifndef NDEBUG -#define FVK_DEBUG_FLAGS (FVK_DEBUG_PERFORMANCE) +#define FVK_DEBUG_FLAGS (FVK_DEBUG_PERFORMANCE | FVK_DEBUG_FORWARDED_FLAG) #else #define FVK_DEBUG_FLAGS 0 #endif @@ -112,7 +108,7 @@ static_assert(FVK_ENABLED(FVK_DEBUG_VALIDATION)); // end dependcy checks // Shorthand for combination of enabled debug flags -#if FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS) || FVK_ENABLED(FVK_DEBUG_TEXTURE) +#if FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS) && FVK_ENABLED(FVK_DEBUG_TEXTURE) #define FVK_ENABLED_DEBUG_SAMPLER_NAME 1 #else #define FVK_ENABLED_DEBUG_SAMPLER_NAME 0 diff --git a/filament/backend/src/vulkan/VulkanContext.cpp b/filament/backend/src/vulkan/VulkanContext.cpp index 1fc2e46c77c..c590977af2a 100644 --- a/filament/backend/src/vulkan/VulkanContext.cpp +++ b/filament/backend/src/vulkan/VulkanContext.cpp @@ -57,15 +57,19 @@ VkExtent2D VulkanAttachment::getExtent2D() const { return { std::max(1u, texture->width >> level), std::max(1u, texture->height >> level) }; } -VkImageView VulkanAttachment::getImageView(VkImageAspectFlags aspect) { +VkImageView VulkanAttachment::getImageView() { assert_invariant(texture); - return texture->getAttachmentView(getSubresourceRange(aspect)); + return texture->getAttachmentView(getSubresourceRange()); } -VkImageSubresourceRange VulkanAttachment::getSubresourceRange(VkImageAspectFlags aspect) const { +bool VulkanAttachment::isDepth() const { + return texture->getImageAspect() & VK_IMAGE_ASPECT_DEPTH_BIT; +} + +VkImageSubresourceRange VulkanAttachment::getSubresourceRange() const { assert_invariant(texture); return { - .aspectMask = aspect, + .aspectMask = texture->getImageAspect(), .baseMipLevel = uint32_t(level), .levelCount = 1, .baseArrayLayer = uint32_t(layer), diff --git a/filament/backend/src/vulkan/VulkanContext.h b/filament/backend/src/vulkan/VulkanContext.h index 7c60f576b35..995175ce943 100644 --- a/filament/backend/src/vulkan/VulkanContext.h +++ b/filament/backend/src/vulkan/VulkanContext.h @@ -44,13 +44,15 @@ struct VulkanAttachment { VulkanTexture* texture = nullptr; uint8_t level = 0; uint16_t layer = 0; + + bool isDepth() const; VkImage getImage() const; VkFormat getFormat() const; VulkanLayout getLayout() const; VkExtent2D getExtent2D() const; - VkImageView getImageView(VkImageAspectFlags aspect); + VkImageView getImageView(); // TODO: maybe embed aspect into the attachment or texture itself. - VkImageSubresourceRange getSubresourceRange(VkImageAspectFlags aspect) const; + VkImageSubresourceRange getSubresourceRange() const; }; class VulkanTimestamps { @@ -101,8 +103,12 @@ struct VulkanContext { return (uint32_t) VK_MAX_MEMORY_TYPES; } - inline VkFormatList const& getAttachmentDepthFormats() const { - return mDepthFormats; + inline VkFormatList const& getAttachmentDepthStencilFormats() const { + return mDepthStencilFormats; + } + + inline VkFormatList const& getBlittableDepthStencilFormats() const { + return mBlittableDepthStencilFormats; } inline VkPhysicalDeviceLimits const& getPhysicalDeviceLimits() const noexcept { @@ -131,7 +137,8 @@ struct VulkanContext { bool mDebugMarkersSupported = false; bool mDebugUtilsSupported = false; - VkFormatList mDepthFormats; + VkFormatList mDepthStencilFormats; + VkFormatList mBlittableDepthStencilFormats; // For convenience so that VulkanPlatform can initialize the private fields. friend class VulkanPlatform; diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index e1cd3d7cf74..49e6f581c9f 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -22,7 +22,6 @@ #include "VulkanCommands.h" #include "VulkanDriverFactory.h" #include "VulkanHandles.h" -#include "VulkanImageUtility.h" #include "VulkanMemory.h" #include "VulkanTexture.h" @@ -206,8 +205,6 @@ void DebugUtils::setName(VkObjectType type, uint64_t handle, char const* name) { } #endif // FVK_EANBLED(FVK_DEBUG_DEBUG_UTILS) -using ImgUtil = VulkanImageUtility; - Dispatcher VulkanDriver::getDispatcher() const noexcept { return ConcreteDispatcher::make(); } @@ -391,12 +388,13 @@ void VulkanDriver::collectGarbage() { FVK_SYSTRACE_END(); } -void VulkanDriver::beginFrame(int64_t monotonic_clock_ns, uint32_t frameId) { +void VulkanDriver::beginFrame(int64_t monotonic_clock_ns, + int64_t refreshIntervalNs, uint32_t frameId) { // Do nothing. } void VulkanDriver::setFrameScheduledCallback(Handle sch, - FrameScheduledCallback callback, void* user) { + CallbackHandler* handler, FrameScheduledCallback&& callback) { } void VulkanDriver::setFrameCompletedCallback(Handle sch, @@ -920,6 +918,11 @@ bool VulkanDriver::isDepthStencilResolveSupported() { return false; } +bool VulkanDriver::isDepthStencilBlitSupported(TextureFormat format) { + auto const& formats = mContext.getBlittableDepthStencilFormats(); + return std::find(formats.begin(), formats.end(), getVkFormat(format)) != formats.end(); +} + bool VulkanDriver::isProtectedTexturesSupported() { return false; } @@ -1233,12 +1236,7 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP } } - VulkanLayout const currentDepthLayout = depth.getLayout(); - VulkanLayout const renderPassDepthLayout = VulkanLayout::DEPTH_ATTACHMENT; - // We need to keep the final layout as an attachment because the implicit transition does not - // have any barrier guarrantees, meaning that if we want to sample from the output in the next - // pass, then we'd have a race-condition/validation error. - VulkanLayout const finalDepthLayout = renderPassDepthLayout; + VulkanLayout currentDepthLayout = depth.getLayout(); TargetBufferFlags clearVal = params.flags.clear; TargetBufferFlags discardEndVal = params.flags.discardEnd; @@ -1247,16 +1245,20 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP discardEndVal &= ~TargetBufferFlags::DEPTH; clearVal &= ~TargetBufferFlags::DEPTH; } - auto const attachmentSubresourceRange = depth.getSubresourceRange(VK_IMAGE_ASPECT_DEPTH_BIT); - depth.texture->setLayout(attachmentSubresourceRange, VulkanLayout::DEPTH_ATTACHMENT); + // If the depth attachment texture was previously sampled, then we need to manually + // transition it to an attachment. This is necessary to also set up a barrier between the + // previous read and the potentially coming write. + if (currentDepthLayout == VulkanLayout::DEPTH_SAMPLER) { + depth.texture->transitionLayout(cmdbuffer, depth.getSubresourceRange(), + VulkanLayout::DEPTH_ATTACHMENT); + currentDepthLayout = VulkanLayout::DEPTH_ATTACHMENT; + } } // Create the VkRenderPass or fetch it from cache. VulkanFboCache::RenderPassKey rpkey = { .initialColorLayoutMask = 0, .initialDepthLayout = currentDepthLayout, - .renderPassDepthLayout = renderPassDepthLayout, - .finalDepthLayout = finalDepthLayout, .depthFormat = depth.getFormat(), .clear = clearVal, .discardStart = discardStart, @@ -1273,9 +1275,9 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP rpkey.needsResolveMask |= (1 << i); } if (info.texture->getPrimaryImageLayout() != VulkanLayout::COLOR_ATTACHMENT) { - ((VulkanTexture*) info.texture)->transitionLayout(cmdbuffer, - info.getSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT), - VulkanLayout::COLOR_ATTACHMENT); + ((VulkanTexture*) info.texture) + ->transitionLayout(cmdbuffer, info.getSubresourceRange(), + VulkanLayout::COLOR_ATTACHMENT); } } else { rpkey.colorFormat[i] = VK_FORMAT_UNDEFINED; @@ -1293,27 +1295,42 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP .layers = 1, .samples = rpkey.samples, }; + auto& renderPassAttachments = mRenderPassFboInfo.attachments; for (int i = 0; i < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; i++) { if (!rt->getColor(i).texture) { fbkey.color[i] = VK_NULL_HANDLE; fbkey.resolve[i] = VK_NULL_HANDLE; } else if (fbkey.samples == 1) { - fbkey.color[i] = rt->getColor(i).getImageView(VK_IMAGE_ASPECT_COLOR_BIT); + auto& colorAttachment = rt->getColor(i); + renderPassAttachments.insert(colorAttachment); + fbkey.color[i] = colorAttachment.getImageView(); fbkey.resolve[i] = VK_NULL_HANDLE; assert_invariant(fbkey.color[i]); } else { - fbkey.color[i] = rt->getMsaaColor(i).getImageView(VK_IMAGE_ASPECT_COLOR_BIT); - VulkanTexture* texture = (VulkanTexture*) rt->getColor(i).texture; + auto& msaaColorAttachment = rt->getMsaaColor(i); + renderPassAttachments.insert(msaaColorAttachment); + + auto& colorAttachment = rt->getColor(i); + fbkey.color[i] = msaaColorAttachment.getImageView(); + + VulkanTexture* texture = colorAttachment.texture; if (texture->samples == 1) { - fbkey.resolve[i] = rt->getColor(i).getImageView(VK_IMAGE_ASPECT_COLOR_BIT); + mRenderPassFboInfo.hasColorResolve = true; + + renderPassAttachments.insert(colorAttachment); + fbkey.resolve[i] = colorAttachment.getImageView(); assert_invariant(fbkey.resolve[i]); } assert_invariant(fbkey.color[i]); } } if (depth.texture) { - fbkey.depth = depth.getImageView(VK_IMAGE_ASPECT_DEPTH_BIT); + fbkey.depth = depth.getImageView(); assert_invariant(fbkey.depth); + renderPassAttachments.insert(depth); + + UTILS_UNUSED_IN_RELEASE bool const depthDiscardEnd = + any(rpkey.discardEnd & TargetBufferFlags::DEPTH); // Vulkan 1.1 does not support multisampled depth resolve, so let's check here // and assert if this is requested. (c.f. isAutoDepthResolveSupported) @@ -1322,7 +1339,7 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP // - If the RT is MS then all SS attachments are auto resolved if not discarded. assert_invariant(!(rt->getSamples() > 1 && rt->getDepth().texture->samples == 1 && - !any(rpkey.discardEnd & TargetBufferFlags::DEPTH))); + !depthDiscardEnd)); } VkFramebuffer vkfb = mFramebufferCache.getFramebuffer(fbkey); @@ -1335,16 +1352,10 @@ void VulkanDriver::beginRenderPass(Handle rth, const RenderPassP } #endif - // The current command buffer now owns a reference to the render target and its attachments. - // Note that we must acquire parent textures, not sidecars. + // The current command buffer now has references to the render target and its attachments. commands.acquire(rt); - if (depth.texture) { - commands.acquire((VulkanTexture*) depth.texture); - } - for (int i = 0; i < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; i++) { - if (rt->getColor(i).texture) { - commands.acquire(rt->getColor(i).texture); - } + for (auto const& attachment: renderPassAttachments) { + commands.acquire(attachment.texture); } // Populate the structures required for vkCmdBeginRenderPass. @@ -1430,27 +1441,51 @@ void VulkanDriver::endRenderPass(int) { // issue several of them when considering MRT. This would be very complex to set up and would // require more state tracking, so we've chosen to use a memory barrier for simplicity and // correctness. - - // NOTE: ideally dstStageMask would merely be VERTEX_SHADER_BIT | FRAGMENT_SHADER_BIT, but this - // seems to be insufficient on Mali devices. To work around this we are adding a more aggressive - // TOP_OF_PIPE barrier. if (!rt->isSwapChain()) { - VkMemoryBarrier barrier{ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - }; - VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - if (rt->hasDepth()) { - barrier.srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - srcStageMask |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + for (auto const& attachment: mRenderPassFboInfo.attachments) { + bool const isDepth = attachment.isDepth(); + VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + // This is a workaround around a validation issue (might not be an actual driver issue). + if (mRenderPassFboInfo.hasColorResolve && !isDepth) { + srcStageMask = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; + } + + VkPipelineStageFlags dstStageMask = + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + VkAccessFlags srcAccess = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + VkAccessFlags dstAccess = VK_ACCESS_SHADER_READ_BIT; + VulkanLayout layout = VulkanFboCache::FINAL_COLOR_ATTACHMENT_LAYOUT; + if (isDepth) { + srcAccess = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + dstAccess = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + layout = VulkanFboCache::FINAL_DEPTH_ATTACHMENT_LAYOUT; + } + + auto const vkLayout = imgutil::getVkLayout(layout); + auto const& range = attachment.getSubresourceRange(); + VkImageMemoryBarrier barrier = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = srcAccess, + .dstAccessMask = dstAccess, + .oldLayout = vkLayout, + .newLayout = vkLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = attachment.getImage(), + .subresourceRange = range, + }; + + attachment.texture->setLayout(range, layout); + vkCmdPipelineBarrier(cmdbuffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, + 1, &barrier); } - vkCmdPipelineBarrier(cmdbuffer, srcStageMask, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | // <== For Mali - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - 0, 1, &barrier, 0, nullptr, 0, nullptr); } + mRenderPassFboInfo.clear(); mDescriptorSetManager.clearState(); mCurrentRenderPass.renderTarget = nullptr; mCurrentRenderPass.renderPass = VK_NULL_HANDLE; @@ -1806,7 +1841,7 @@ void VulkanDriver::bindPipeline(PipelineState pipelineState) { // This fallback path is very flaky because the dummy texture might not have // matching characteristics. (e.g. if the missing texture is a 3D texture) if (UTILS_UNLIKELY(texture->getPrimaryImageLayout() == VulkanLayout::UNDEFINED)) { -#if FVK_ENABLED(FVK_DEBUG_TEXTURE) +#if FVK_ENABLED(FVK_DEBUG_TEXTURE) && FVK_ENABLED_DEBUG_SAMPLER_NAME utils::slog.w << "Uninitialized texture bound to '" << bindingToName[binding] << "'"; utils::slog.w << " in material '" << program->name.c_str() << "'"; utils::slog.w << " at binding point " << +binding << utils::io::endl; @@ -1814,15 +1849,11 @@ void VulkanDriver::bindPipeline(PipelineState pipelineState) { texture = mEmptyTexture; } + VkSampler const vksampler = mSamplerCache.getSampler(boundSampler->s); #if FVK_ENABLED_DEBUG_SAMPLER_NAME VulkanDriver::DebugUtils::setName(VK_OBJECT_TYPE_SAMPLER, reinterpret_cast(vksampler), bindingToName[binding].c_str()); - VulkanDriver::DebugUtils::setName(VK_OBJECT_TYPE_SAMPLER, - reinterpret_cast(samplerInfo.sampler), bindingToName[binding].c_str()); #endif - - VkSampler const vksampler = mSamplerCache.getSampler(boundSampler->s); - mDescriptorSetManager.updateSampler({}, binding, texture, vksampler); } diff --git a/filament/backend/src/vulkan/VulkanDriver.h b/filament/backend/src/vulkan/VulkanDriver.h index fca5c45c5be..8de0ae4a26e 100644 --- a/filament/backend/src/vulkan/VulkanDriver.h +++ b/filament/backend/src/vulkan/VulkanDriver.h @@ -42,6 +42,25 @@ namespace filament::backend { class VulkanPlatform; struct VulkanSamplerGroup; +// The maximum number of attachments for any renderpass (color + resolve + depth) +constexpr uint8_t MAX_RENDERTARGET_ATTACHMENT_TEXTURES = + MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT * 2 + 1; + +// We need to store information about a render pass to enable better barriers at the end of a +// renderpass. +struct RenderPassFboBundle { + using AttachmentArray = + CappedArray; + + AttachmentArray attachments; + bool hasColorResolve = false; + + void clear() { + attachments.clear(); + hasColorResolve = false; + } +}; + class VulkanDriver final : public DriverBase { public: static Driver* create(VulkanPlatform* platform, VulkanContext const& context, @@ -141,6 +160,8 @@ class VulkanDriver final : public DriverBase { VulkanDescriptorSetManager::GetPipelineLayoutFunction mGetPipelineFunction; + RenderPassFboBundle mRenderPassFboInfo; + bool const mIsSRGBSwapChainSupported; }; diff --git a/filament/backend/src/vulkan/VulkanFboCache.cpp b/filament/backend/src/vulkan/VulkanFboCache.cpp index f4b222b1545..c754fe57e70 100644 --- a/filament/backend/src/vulkan/VulkanFboCache.cpp +++ b/filament/backend/src/vulkan/VulkanFboCache.cpp @@ -29,14 +29,10 @@ using namespace bluevk; namespace filament::backend { -using ImgUtil = VulkanImageUtility; - bool VulkanFboCache::RenderPassEq::operator()(const RenderPassKey& k1, const RenderPassKey& k2) const { if (k1.initialColorLayoutMask != k2.initialColorLayoutMask) return false; if (k1.initialDepthLayout != k2.initialDepthLayout) return false; - if (k1.renderPassDepthLayout != k2.renderPassDepthLayout) return false; - if (k1.finalDepthLayout != k2.finalDepthLayout) return false; for (int i = 0; i < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; i++) { if (k1.colorFormat[i] != k2.colorFormat[i]) return false; } @@ -197,7 +193,7 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { if (config.colorFormat[i] == VK_FORMAT_UNDEFINED) { continue; } - const VkImageLayout subpassLayout = ImgUtil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT); + const VkImageLayout subpassLayout = imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT); uint32_t index; if (!hasSubpasses) { @@ -243,9 +239,9 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { .stencilLoadOp = kDontCare, .stencilStoreOp = kDisableStore, .initialLayout = ((!discard && config.initialColorLayoutMask & (1 << i)) || clear) - ? ImgUtil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT) - : ImgUtil::getVkLayout(VulkanLayout::UNDEFINED), - .finalLayout = ImgUtil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), + ? imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT) + : imgutil::getVkLayout(VulkanLayout::UNDEFINED), + .finalLayout = imgutil::getVkLayout(FINAL_COLOR_ATTACHMENT_LAYOUT), }; } @@ -272,7 +268,7 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { pResolveAttachment->attachment = attachmentIndex; pResolveAttachment->layout - = ImgUtil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT_RESOLVE); + = imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT_RESOLVE); ++pResolveAttachment; attachments[attachmentIndex++] = { @@ -282,8 +278,8 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { .storeOp = kEnableStore, .stencilLoadOp = kDontCare, .stencilStoreOp = kDisableStore, - .initialLayout = ImgUtil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), - .finalLayout = ImgUtil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), + .initialLayout = imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT), + .finalLayout = imgutil::getVkLayout(FINAL_COLOR_ATTACHMENT_LAYOUT), }; } @@ -292,7 +288,7 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { const bool clear = any(config.clear & TargetBufferFlags::DEPTH); const bool discardStart = any(config.discardStart & TargetBufferFlags::DEPTH); const bool discardEnd = any(config.discardEnd & TargetBufferFlags::DEPTH); - depthAttachmentRef.layout = ImgUtil::getVkLayout(config.renderPassDepthLayout); + depthAttachmentRef.layout = imgutil::getVkLayout(VulkanLayout::DEPTH_ATTACHMENT); depthAttachmentRef.attachment = attachmentIndex; attachments[attachmentIndex++] = { .format = config.depthFormat, @@ -301,8 +297,8 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept { .storeOp = discardEnd ? kDisableStore : kEnableStore, .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, - .initialLayout = ImgUtil::getVkLayout(config.initialDepthLayout), - .finalLayout = ImgUtil::getVkLayout(config.finalDepthLayout), + .initialLayout = imgutil::getVkLayout(config.initialDepthLayout), + .finalLayout = imgutil::getVkLayout(FINAL_DEPTH_ATTACHMENT_LAYOUT), }; } renderPassInfo.attachmentCount = attachmentIndex; diff --git a/filament/backend/src/vulkan/VulkanFboCache.h b/filament/backend/src/vulkan/VulkanFboCache.h index cefdae2995d..2f4b1a9101c 100644 --- a/filament/backend/src/vulkan/VulkanFboCache.h +++ b/filament/backend/src/vulkan/VulkanFboCache.h @@ -35,24 +35,25 @@ namespace filament::backend { // class VulkanFboCache { public: + constexpr static VulkanLayout FINAL_COLOR_ATTACHMENT_LAYOUT = VulkanLayout::COLOR_ATTACHMENT; + constexpr static VulkanLayout FINAL_RESOLVE_ATTACHMENT_LAYOUT = VulkanLayout::COLOR_ATTACHMENT; + constexpr static VulkanLayout FINAL_DEPTH_ATTACHMENT_LAYOUT = VulkanLayout::DEPTH_ATTACHMENT; + // RenderPassKey is a small POD representing the immutable state that is used to construct // a VkRenderPass. It is hashed and used as a lookup key. - // TODO: This struct can be reduced in size by using a subset of formats instead of VkFormat - // and removing the "finalDepthLayout" field. struct alignas(8) RenderPassKey { // For each target, we need to know three image layouts: the layout BEFORE the pass, the // layout DURING the pass, and the layout AFTER the pass. Here are the rules: // - For depth, we explicitly specify all three layouts. // - Color targets have their initial image layout specified with a bitmask. // - For each color target, the pre-existing layout is either UNDEFINED (0) or GENERAL (1). - // - The render pass and final images layout for color buffers is always GENERAL. + // - The render pass and final images layout for color buffers is always + // VulkanLayout::COLOR_ATTACHMENT. uint8_t initialColorLayoutMask; // Note that if VulkanLayout grows beyond 16, we'd need to up this. - VulkanLayout initialDepthLayout : 4; - VulkanLayout renderPassDepthLayout : 4; - VulkanLayout finalDepthLayout : 4; - uint8_t padding0 : 4; + VulkanLayout initialDepthLayout : 8; + uint8_t padding0; uint8_t padding1; VkFormat colorFormat[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT]; // 32 bytes @@ -63,7 +64,7 @@ class VulkanFboCache { uint8_t samples; // 1 byte uint8_t needsResolveMask; // 1 byte uint8_t subpassMask; // 1 byte - bool padding2; // 1 byte + uint8_t padding2; // 1 byte }; struct RenderPassVal { VkRenderPass handle; diff --git a/filament/backend/src/vulkan/VulkanHandles.cpp b/filament/backend/src/vulkan/VulkanHandles.cpp index 9d615ce5151..0dd11ce8d1d 100644 --- a/filament/backend/src/vulkan/VulkanHandles.cpp +++ b/filament/backend/src/vulkan/VulkanHandles.cpp @@ -65,21 +65,6 @@ static constexpr Bitmask fromStageFlags(ShaderStageFlags2 flags, uint8_t binding return ret; } -UsageFlags getUsageFlags(uint16_t binding, ShaderStageFlags flags, UsageFlags src) { - // NOTE: if you modify this function, you also need to modify getShaderStageFlags. - assert_invariant(binding < MAX_SAMPLER_COUNT); - if (any(flags & ShaderStageFlags::VERTEX)) { - src.set(binding); - } - if (any(flags & ShaderStageFlags::FRAGMENT)) { - src.set(MAX_SAMPLER_COUNT + binding); - } - // TODO: add support for compute by extending SHADER_MODULE_COUNT and ensuring UsageFlags - // has 186 bits (MAX_SAMPLER_COUNT * 3) - // assert_invariant(!any(flags & ~(ShaderStageFlags::VERTEX | ShaderStageFlags::FRAGMENT))); - return src; -} - constexpr decltype(VulkanProgram::MAX_SHADER_MODULES) MAX_SHADER_MODULES = VulkanProgram::MAX_SHADER_MODULES; @@ -236,7 +221,6 @@ VulkanProgram::VulkanProgram(VkDevice device, Program const& builder) noexcept auto& groupInfo = builder.getSamplerGroupInfo(); auto& bindingToSamplerIndex = mInfo->bindingToSamplerIndex; auto& bindings = mInfo->bindings; - auto& usage = mInfo->usage; for (uint8_t groupInd = 0; groupInd < Program::SAMPLER_BINDING_COUNT; groupInd++) { auto const& group = groupInfo[groupInd]; auto const& samplers = group.samplers; @@ -245,7 +229,6 @@ VulkanProgram::VulkanProgram(VkDevice device, Program const& builder) noexcept bindingToSamplerIndex[binding] = (groupInd << 8) | (0xff & i); assert_invariant(bindings.find(binding) == bindings.end()); bindings.insert(binding); - usage = getUsageFlags(binding, group.stageFlags, usage); #if FVK_ENABLED_DEBUG_SAMPLER_NAME bindingToName[binding] = samplers[i].name.c_str(); @@ -302,13 +285,13 @@ VulkanRenderTarget::VulkanRenderTarget(VkDevice device, VkPhysicalDevice physica // Constrain the sample count according to both kinds of sample count masks obtained from // VkPhysicalDeviceProperties. This is consistent with the VulkanTexture constructor. - const auto& limits = context.getPhysicalDeviceLimits(); + auto const& limits = context.getPhysicalDeviceLimits(); mSamples = samples = reduceSampleCount(samples, limits.framebufferDepthSampleCounts & limits.framebufferColorSampleCounts); // Create sidecar MSAA textures for color attachments if they don't already exist. for (int index = 0; index < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; index++) { - const VulkanAttachment& spec = color[index]; + VulkanAttachment const& spec = color[index]; VulkanTexture* texture = (VulkanTexture*) spec.texture; if (texture && texture->samples == 1) { auto msTexture = texture->getSidecar(); @@ -371,19 +354,19 @@ VkExtent2D VulkanRenderTarget::getExtent() const { return {width, height}; } -VulkanAttachment VulkanRenderTarget::getColor(int target) const { +VulkanAttachment& VulkanRenderTarget::getColor(int target) { return mColor[target]; } -VulkanAttachment VulkanRenderTarget::getMsaaColor(int target) const { +VulkanAttachment& VulkanRenderTarget::getMsaaColor(int target) { return mMsaaAttachments[target]; } -VulkanAttachment VulkanRenderTarget::getDepth() const { +VulkanAttachment& VulkanRenderTarget::getDepth() { return mDepth; } -VulkanAttachment VulkanRenderTarget::getMsaaDepth() const { +VulkanAttachment& VulkanRenderTarget::getMsaaDepth() { return mMsaaDepthAttachment; } diff --git a/filament/backend/src/vulkan/VulkanHandles.h b/filament/backend/src/vulkan/VulkanHandles.h index 8abdccfd968..bb20097a474 100644 --- a/filament/backend/src/vulkan/VulkanHandles.h +++ b/filament/backend/src/vulkan/VulkanHandles.h @@ -198,10 +198,6 @@ struct VulkanProgram : public HwProgram, VulkanResource { return mInfo->bindingToSamplerIndex; } - inline UsageFlags getUsage() const { - return mInfo->usage; - } - // Get a list of the sampler binding indices so that we don't have to loop through all possible // samplers. inline BindingList const& getBindings() const { return mInfo->bindings; } @@ -235,10 +231,6 @@ struct VulkanProgram : public HwProgram, VulkanResource { #endif {} - // This bitset maps to each of the sampler in the sampler groups associated with this - // program, and whether each sampler is used in which shader (i.e. vert, frag, compute). - UsageFlags usage; - BindingList bindings; // We store the samplerGroupIndex as the top 8-bit and the index within each group as the lower 8-bit. @@ -282,10 +274,11 @@ struct VulkanRenderTarget : private HwRenderTarget, VulkanResource { void transformClientRectToPlatform(VkRect2D* bounds) const; void transformClientRectToPlatform(VkViewport* bounds) const; VkExtent2D getExtent() const; - VulkanAttachment getColor(int target) const; - VulkanAttachment getMsaaColor(int target) const; - VulkanAttachment getDepth() const; - VulkanAttachment getMsaaDepth() const; + // We return references in the following methods to avoid a copy. + VulkanAttachment& getColor(int target); + VulkanAttachment& getMsaaColor(int target); + VulkanAttachment& getDepth(); + VulkanAttachment& getMsaaDepth(); uint8_t getColorTargetCount(const VulkanRenderPass& pass) const; uint8_t getSamples() const { return mSamples; } bool hasDepth() const { return mDepth.texture; } diff --git a/filament/backend/src/vulkan/VulkanImageUtility.cpp b/filament/backend/src/vulkan/VulkanImageUtility.cpp index 108a3f2b96a..6c038e429f0 100644 --- a/filament/backend/src/vulkan/VulkanImageUtility.cpp +++ b/filament/backend/src/vulkan/VulkanImageUtility.cpp @@ -26,38 +26,10 @@ using namespace bluevk; -namespace filament::backend { +namespace filament::backend::imgutil { namespace { -inline VkImageLayout getVkImageLayout(VulkanLayout layout) { - switch (layout) { - case VulkanLayout::UNDEFINED: - return VK_IMAGE_LAYOUT_UNDEFINED; - case VulkanLayout::READ_WRITE: - return VK_IMAGE_LAYOUT_GENERAL; - case VulkanLayout::READ_ONLY: - return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - case VulkanLayout::TRANSFER_SRC: - return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - case VulkanLayout::TRANSFER_DST: - return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - case VulkanLayout::DEPTH_ATTACHMENT: - return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - case VulkanLayout::DEPTH_SAMPLER: - return VK_IMAGE_LAYOUT_GENERAL; - case VulkanLayout::PRESENT: - return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - // Filament sometimes samples from one miplevel while writing to another level in the same - // texture (e.g. bloom does this). Moreover we'd like to avoid lots of expensive layout - // transitions. So, keep it simple and use GENERAL for all color-attachable textures. - case VulkanLayout::COLOR_ATTACHMENT: - return VK_IMAGE_LAYOUT_GENERAL; - case VulkanLayout::COLOR_ATTACHMENT_RESOLVE: - return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - } -} - inline std::tuple getVkTransition(const VulkanLayoutTransition& transition) { @@ -70,11 +42,11 @@ getVkTransition(const VulkanLayoutTransition& transition) { srcStage = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; break; case VulkanLayout::COLOR_ATTACHMENT: - srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT - | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT - | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - srcStage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT - | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + srcStage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; break; case VulkanLayout::READ_WRITE: srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; @@ -154,27 +126,12 @@ getVkTransition(const VulkanLayoutTransition& transition) { } return std::make_tuple(srcAccessMask, dstAccessMask, srcStage, dstStage, - getVkImageLayout(transition.oldLayout), getVkImageLayout(transition.newLayout)); + getVkLayout(transition.oldLayout), getVkLayout(transition.newLayout)); } }// anonymous namespace -VkImageViewType VulkanImageUtility::getViewType(SamplerType target) { - switch (target) { - case SamplerType::SAMPLER_CUBEMAP: - return VK_IMAGE_VIEW_TYPE_CUBE; - case SamplerType::SAMPLER_2D_ARRAY: - return VK_IMAGE_VIEW_TYPE_2D_ARRAY; - case SamplerType::SAMPLER_CUBEMAP_ARRAY: - return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; - case SamplerType::SAMPLER_3D: - return VK_IMAGE_VIEW_TYPE_3D; - default: - return VK_IMAGE_VIEW_TYPE_2D; - } -} - -void VulkanImageUtility::transitionLayout(VkCommandBuffer cmdbuffer, +void transitionLayout(VkCommandBuffer cmdbuffer, VulkanLayoutTransition transition) { if (transition.oldLayout == transition.newLayout) { return; @@ -197,10 +154,6 @@ void VulkanImageUtility::transitionLayout(VkCommandBuffer cmdbuffer, vkCmdPipelineBarrier(cmdbuffer, srcStage, dstStage, 0, 0, nullptr, 0, nullptr, 1, &barrier); } -VkImageLayout VulkanImageUtility::getVkLayout(VulkanLayout layout) { - return getVkImageLayout(layout); -} - }// namespace filament::backend bool operator<(const VkImageSubresourceRange& a, const VkImageSubresourceRange& b) { @@ -217,12 +170,12 @@ bool operator<(const VkImageSubresourceRange& a, const VkImageSubresourceRange& return false; } -#if FVK_ENABLED(FVK_DEBUG_LAYOUT_TRANSITION | FVK_DEBUG_TEXTURE) +#if FVK_ENABLED(FVK_DEBUG_LAYOUT_TRANSITION) || FVK_ENABLED(FVK_DEBUG_TEXTURE) #define CASE(VALUE) \ case filament::backend::VulkanLayout::VALUE: { \ out << #VALUE; \ out << " [" \ - << filament::backend::VulkanImageUtility::getVkLayout( \ + << filament::backend::imgutil::getVkLayout( \ filament::backend::VulkanLayout::VALUE) \ << "]"; \ break; \ diff --git a/filament/backend/src/vulkan/VulkanImageUtility.h b/filament/backend/src/vulkan/VulkanImageUtility.h index efb3506471b..92aaac96ea4 100644 --- a/filament/backend/src/vulkan/VulkanImageUtility.h +++ b/filament/backend/src/vulkan/VulkanImageUtility.h @@ -59,44 +59,85 @@ struct VulkanLayoutTransition { VkImageSubresourceRange subresources; }; -class VulkanImageUtility { -public: - static VkImageViewType getViewType(SamplerType target); - - inline static VulkanLayout getDefaultLayout(TextureUsage usage) { - if (any(usage & TextureUsage::DEPTH_ATTACHMENT)) { - if (any(usage & TextureUsage::SAMPLEABLE)) { - return VulkanLayout::DEPTH_SAMPLER; - } else { - return VulkanLayout::DEPTH_ATTACHMENT; - } - } - - if (any(usage & TextureUsage::COLOR_ATTACHMENT)) { - return VulkanLayout::COLOR_ATTACHMENT; +namespace imgutil { + +inline VkImageViewType getViewType(SamplerType target) { + switch (target) { + case SamplerType::SAMPLER_CUBEMAP: + return VK_IMAGE_VIEW_TYPE_CUBE; + case SamplerType::SAMPLER_2D_ARRAY: + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case SamplerType::SAMPLER_CUBEMAP_ARRAY: + return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; + case SamplerType::SAMPLER_3D: + return VK_IMAGE_VIEW_TYPE_3D; + default: + return VK_IMAGE_VIEW_TYPE_2D; + } +} + +inline VulkanLayout getDefaultLayout(TextureUsage usage) { + if (any(usage & TextureUsage::DEPTH_ATTACHMENT)) { + if (any(usage & TextureUsage::SAMPLEABLE)) { + return VulkanLayout::DEPTH_SAMPLER; + } else { + return VulkanLayout::DEPTH_ATTACHMENT; } - // Finally, the layout for an immutable texture is optimal read-only. - return VulkanLayout::READ_ONLY; } - inline static VulkanLayout getDefaultLayout(VkImageUsageFlags vkusage) { - TextureUsage usage {}; - if (vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - usage = usage | TextureUsage::DEPTH_ATTACHMENT; - } - if (vkusage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - usage = usage | TextureUsage::COLOR_ATTACHMENT; - } - if (vkusage & VK_IMAGE_USAGE_SAMPLED_BIT) { - usage = usage | TextureUsage::SAMPLEABLE; - } - return getDefaultLayout(usage); + if (any(usage & TextureUsage::COLOR_ATTACHMENT)) { + return VulkanLayout::COLOR_ATTACHMENT; } + // Finally, the layout for an immutable texture is optimal read-only. + return VulkanLayout::READ_ONLY; +} + +inline VulkanLayout getDefaultLayout(VkImageUsageFlags vkusage) { + TextureUsage usage{}; + if (vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + usage = usage | TextureUsage::DEPTH_ATTACHMENT; + } + if (vkusage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + usage = usage | TextureUsage::COLOR_ATTACHMENT; + } + if (vkusage & VK_IMAGE_USAGE_SAMPLED_BIT) { + usage = usage | TextureUsage::SAMPLEABLE; + } + return getDefaultLayout(usage); +} + +constexpr inline VkImageLayout getVkLayout(VulkanLayout layout) { + switch (layout) { + case VulkanLayout::UNDEFINED: + return VK_IMAGE_LAYOUT_UNDEFINED; + case VulkanLayout::READ_WRITE: + return VK_IMAGE_LAYOUT_GENERAL; + case VulkanLayout::READ_ONLY: + return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + case VulkanLayout::TRANSFER_SRC: + return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + case VulkanLayout::TRANSFER_DST: + return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + case VulkanLayout::DEPTH_ATTACHMENT: + return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + case VulkanLayout::DEPTH_SAMPLER: + return VK_IMAGE_LAYOUT_GENERAL; + case VulkanLayout::PRESENT: + return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + // Filament sometimes samples from one miplevel while writing to another level in the + // same texture (e.g. bloom does this). Moreover we'd like to avoid lots of expensive + // layout transitions. So, keep it simple and use GENERAL for all color-attachable + // textures. + case VulkanLayout::COLOR_ATTACHMENT: + return VK_IMAGE_LAYOUT_GENERAL; + case VulkanLayout::COLOR_ATTACHMENT_RESOLVE: + return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } +} - static VkImageLayout getVkLayout(VulkanLayout layout); - - static void transitionLayout(VkCommandBuffer cmdbuffer, VulkanLayoutTransition transition); -}; +void transitionLayout(VkCommandBuffer cmdbuffer, VulkanLayoutTransition transition); + +} // namespace imgutil } // namespace filament::backend diff --git a/filament/backend/src/vulkan/VulkanReadPixels.cpp b/filament/backend/src/vulkan/VulkanReadPixels.cpp index 83e341c2768..2fc7ee2e090 100644 --- a/filament/backend/src/vulkan/VulkanReadPixels.cpp +++ b/filament/backend/src/vulkan/VulkanReadPixels.cpp @@ -28,7 +28,6 @@ using namespace bluevk; namespace filament::backend { -using ImgUtil = VulkanImageUtility; using TaskHandler = VulkanReadPixels::TaskHandler; using WorkloadFunc = TaskHandler::WorkloadFunc; using OnCompleteFunc = TaskHandler::OnCompleteFunc; @@ -118,7 +117,7 @@ void VulkanReadPixels::terminate() noexcept { VulkanReadPixels::VulkanReadPixels(VkDevice device) : mDevice(device) {} -void VulkanReadPixels::run(VulkanRenderTarget const* srcTarget, uint32_t const x, uint32_t const y, +void VulkanReadPixels::run(VulkanRenderTarget* srcTarget, uint32_t const x, uint32_t const y, uint32_t const width, uint32_t const height, uint32_t const graphicsQueueFamilyIndex, PixelBufferDescriptor&& pbd, SelecteMemoryFunction const& selectMemoryFunc, OnReadCompleteFunction const& readCompleteFunc) { @@ -218,7 +217,7 @@ void VulkanReadPixels::run(VulkanRenderTarget const* srcTarget, uint32_t const x }; vkBeginCommandBuffer(cmdbuffer, &binfo); - ImgUtil::transitionLayout(cmdbuffer, { + imgutil::transitionLayout(cmdbuffer, { .image = stagingImage, .oldLayout = VulkanLayout::UNDEFINED, .newLayout = VulkanLayout::TRANSFER_DST, @@ -232,8 +231,7 @@ void VulkanReadPixels::run(VulkanRenderTarget const* srcTarget, uint32_t const x }); VulkanAttachment const srcAttachment = srcTarget->getColor(0); - const VkImageSubresourceRange srcRange - = srcAttachment.getSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT); + VkImageSubresourceRange const srcRange = srcAttachment.getSubresourceRange(); srcTexture->transitionLayout(cmdbuffer, srcRange, VulkanLayout::TRANSFER_SRC); VkImageCopy const imageCopyRegion = { @@ -266,8 +264,8 @@ void VulkanReadPixels::run(VulkanRenderTarget const* srcTarget, uint32_t const x imageCopyRegion.srcOffset.y + imageCopyRegion.extent.height <= srcExtent.height); vkCmdCopyImage(cmdbuffer, srcAttachment.getImage(), - ImgUtil::getVkLayout(VulkanLayout::TRANSFER_SRC), stagingImage, - ImgUtil::getVkLayout(VulkanLayout::TRANSFER_DST), 1, &imageCopyRegion); + imgutil::getVkLayout(VulkanLayout::TRANSFER_SRC), stagingImage, + imgutil::getVkLayout(VulkanLayout::TRANSFER_DST), 1, &imageCopyRegion); // Restore the source image layout. srcTexture->transitionLayout(cmdbuffer, srcRange, VulkanLayout::COLOR_ATTACHMENT); diff --git a/filament/backend/src/vulkan/VulkanReadPixels.h b/filament/backend/src/vulkan/VulkanReadPixels.h index 4c3e3297e94..e5dd2a31bb9 100644 --- a/filament/backend/src/vulkan/VulkanReadPixels.h +++ b/filament/backend/src/vulkan/VulkanReadPixels.h @@ -26,9 +26,7 @@ #include #include #include -#include #include -#include namespace filament::backend { @@ -74,8 +72,8 @@ class VulkanReadPixels { void terminate() noexcept; - void run(VulkanRenderTarget const* srcTarget, uint32_t x, uint32_t y, uint32_t width, - uint32_t height, uint32_t graphicsQueueFamilyIndex, PixelBufferDescriptor&& pbd, + void run(VulkanRenderTarget* srcTarget, uint32_t x, uint32_t y, uint32_t width, uint32_t height, + uint32_t graphicsQueueFamilyIndex, PixelBufferDescriptor&& pbd, SelecteMemoryFunction const& selectMemoryFunc, OnReadCompleteFunction const& readCompleteFunc); diff --git a/filament/backend/src/vulkan/VulkanStagePool.cpp b/filament/backend/src/vulkan/VulkanStagePool.cpp index 4c21104d003..05c843f0bae 100644 --- a/filament/backend/src/vulkan/VulkanStagePool.cpp +++ b/filament/backend/src/vulkan/VulkanStagePool.cpp @@ -17,6 +17,7 @@ #include "VulkanStagePool.h" #include "VulkanConstants.h" +#include "VulkanImageUtility.h" #include "VulkanMemory.h" #include "VulkanUtility.h" @@ -118,7 +119,7 @@ VulkanStageImage const* VulkanStagePool::acquireImage(PixelDataFormat format, Pi // VK_IMAGE_LAYOUT_PREINITIALIZED or VK_IMAGE_LAYOUT_GENERAL layout. Calling // vkGetImageSubresourceLayout for a linear image returns a subresource layout mapping that is // valid for either of those image layouts." - VulkanImageUtility::transitionLayout(cmdbuffer, { + imgutil::transitionLayout(cmdbuffer, { .image = image->image, .oldLayout = VulkanLayout::UNDEFINED, .newLayout = VulkanLayout::READ_WRITE, // (= VK_IMAGE_LAYOUT_GENERAL) diff --git a/filament/backend/src/vulkan/VulkanTexture.cpp b/filament/backend/src/vulkan/VulkanTexture.cpp index f611f40aac7..7819c00cedc 100644 --- a/filament/backend/src/vulkan/VulkanTexture.cpp +++ b/filament/backend/src/vulkan/VulkanTexture.cpp @@ -28,7 +28,6 @@ using namespace bluevk; namespace filament::backend { -using ImgUtil = VulkanImageUtility; VulkanTexture::VulkanTexture(VkDevice device, VmaAllocator allocator, VulkanCommands* commands, VkImage image, VkFormat format, uint8_t samples, uint32_t width, uint32_t height, TextureUsage tusage, VulkanStagePool& stagePool, bool heapAllocated) @@ -37,7 +36,7 @@ VulkanTexture::VulkanTexture(VkDevice device, VmaAllocator allocator, VulkanComm VulkanResource( heapAllocated ? VulkanResourceType::HEAP_ALLOCATED : VulkanResourceType::TEXTURE), mVkFormat(format), - mViewType(ImgUtil::getViewType(target)), + mViewType(imgutil::getViewType(target)), mSwizzle({}), mTextureImage(image), mFullViewRange{ @@ -62,7 +61,7 @@ VulkanTexture::VulkanTexture(VkDevice device, VkPhysicalDevice physicalDevice, VulkanResource( heapAllocated ? VulkanResourceType::HEAP_ALLOCATED : VulkanResourceType::TEXTURE), mVkFormat(backend::getVkFormat(tformat)), - mViewType(ImgUtil::getViewType(target)), + mViewType(imgutil::getViewType(target)), mSwizzle(swizzle), mStagePool(stagePool), mDevice(device), @@ -236,7 +235,7 @@ VulkanTexture::VulkanTexture(VkDevice device, VkPhysicalDevice physicalDevice, VulkanCommandBuffer& commands = mCommands->get(); VkCommandBuffer const cmdbuf = commands.buffer(); commands.acquire(this); - transitionLayout(cmdbuf, mFullViewRange, ImgUtil::getDefaultLayout(imageInfo.usage)); + transitionLayout(cmdbuf, mFullViewRange, imgutil::getDefaultLayout(imageInfo.usage)); } } @@ -327,10 +326,10 @@ void VulkanTexture::updateImage(const PixelBufferDescriptor& data, uint32_t widt VulkanLayout const newLayout = VulkanLayout::TRANSFER_DST; VulkanLayout nextLayout = getLayout(transitionRange.baseArrayLayer, miplevel); - VkImageLayout const newVkLayout = ImgUtil::getVkLayout(newLayout); + VkImageLayout const newVkLayout = imgutil::getVkLayout(newLayout); if (nextLayout == VulkanLayout::UNDEFINED) { - nextLayout = ImgUtil::getDefaultLayout(this->usage); + nextLayout = imgutil::getDefaultLayout(this->usage); } transitionLayout(cmdbuf, transitionRange, newLayout); @@ -374,8 +373,8 @@ void VulkanTexture::updateImageWithBlit(const PixelBufferDescriptor& hostData, u VulkanLayout const oldLayout = getLayout(layer, miplevel); transitionLayout(cmdbuf, range, newLayout); - vkCmdBlitImage(cmdbuf, stage->image, ImgUtil::getVkLayout(VulkanLayout::TRANSFER_SRC), - mTextureImage, ImgUtil::getVkLayout(newLayout), 1, blitRegions, VK_FILTER_NEAREST); + vkCmdBlitImage(cmdbuf, stage->image, imgutil::getVkLayout(VulkanLayout::TRANSFER_SRC), + mTextureImage, imgutil::getVkLayout(newLayout), 1, blitRegions, VK_FILTER_NEAREST); transitionLayout(cmdbuf, range, oldLayout); } @@ -467,7 +466,7 @@ void VulkanTexture::transitionLayout(VkCommandBuffer cmdbuf, const VkImageSubres for (uint32_t i = firstLayer; i < lastLayer; ++i) { for (uint32_t j = firstLevel; j < lastLevel; ++j) { VulkanLayout const layout = getLayout(i, j); - ImgUtil::transitionLayout(cmdbuf, { + imgutil::transitionLayout(cmdbuf, { .image = mTextureImage, .oldLayout = layout, .newLayout = newLayout, @@ -482,7 +481,7 @@ void VulkanTexture::transitionLayout(VkCommandBuffer cmdbuf, const VkImageSubres } } } else { - ImgUtil::transitionLayout(cmdbuf, { + imgutil::transitionLayout(cmdbuf, { .image = mTextureImage, .oldLayout = oldLayout, .newLayout = newLayout, diff --git a/filament/backend/src/vulkan/VulkanUtility.cpp b/filament/backend/src/vulkan/VulkanUtility.cpp index 4b60b92f4d8..612acb20e5d 100644 --- a/filament/backend/src/vulkan/VulkanUtility.cpp +++ b/filament/backend/src/vulkan/VulkanUtility.cpp @@ -655,6 +655,4 @@ uint8_t reduceSampleCount(uint8_t sampleCount, VkSampleCountFlags mask) { return mostSignificantBit((sampleCount - 1) & mask); } -_BitCountHelper _BitCountHelper::BitCounter = {}; - } // namespace filament::backend diff --git a/filament/backend/src/vulkan/VulkanUtility.h b/filament/backend/src/vulkan/VulkanUtility.h index a5e053633fa..4dd40359110 100644 --- a/filament/backend/src/vulkan/VulkanUtility.h +++ b/filament/backend/src/vulkan/VulkanUtility.h @@ -90,12 +90,321 @@ utils::FixedCapacityVector enumerate( #undef EXPAND_ENUM_NO_ARGS #undef EXPAND_ENUM_ARGS -// Used across pipeline related classes. -using UsageFlags = utils::bitset128; - // Useful shorthands using VkFormatList = utils::FixedCapacityVector; +// Copied from +// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkFormat.html +constexpr VkFormat ALL_VK_FORMATS[] = { + VK_FORMAT_UNDEFINED, + VK_FORMAT_R4G4_UNORM_PACK8, + VK_FORMAT_R4G4B4A4_UNORM_PACK16, + VK_FORMAT_B4G4R4A4_UNORM_PACK16, + VK_FORMAT_R5G6B5_UNORM_PACK16, + VK_FORMAT_B5G6R5_UNORM_PACK16, + VK_FORMAT_R5G5B5A1_UNORM_PACK16, + VK_FORMAT_B5G5R5A1_UNORM_PACK16, + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_SNORM, + VK_FORMAT_R8_USCALED, + VK_FORMAT_R8_SSCALED, + VK_FORMAT_R8_UINT, + VK_FORMAT_R8_SINT, + VK_FORMAT_R8_SRGB, + VK_FORMAT_R8G8_UNORM, + VK_FORMAT_R8G8_SNORM, + VK_FORMAT_R8G8_USCALED, + VK_FORMAT_R8G8_SSCALED, + VK_FORMAT_R8G8_UINT, + VK_FORMAT_R8G8_SINT, + VK_FORMAT_R8G8_SRGB, + VK_FORMAT_R8G8B8_UNORM, + VK_FORMAT_R8G8B8_SNORM, + VK_FORMAT_R8G8B8_USCALED, + VK_FORMAT_R8G8B8_SSCALED, + VK_FORMAT_R8G8B8_UINT, + VK_FORMAT_R8G8B8_SINT, + VK_FORMAT_R8G8B8_SRGB, + VK_FORMAT_B8G8R8_UNORM, + VK_FORMAT_B8G8R8_SNORM, + VK_FORMAT_B8G8R8_USCALED, + VK_FORMAT_B8G8R8_SSCALED, + VK_FORMAT_B8G8R8_UINT, + VK_FORMAT_B8G8R8_SINT, + VK_FORMAT_B8G8R8_SRGB, + VK_FORMAT_R8G8B8A8_UNORM, + VK_FORMAT_R8G8B8A8_SNORM, + VK_FORMAT_R8G8B8A8_USCALED, + VK_FORMAT_R8G8B8A8_SSCALED, + VK_FORMAT_R8G8B8A8_UINT, + VK_FORMAT_R8G8B8A8_SINT, + VK_FORMAT_R8G8B8A8_SRGB, + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SNORM, + VK_FORMAT_B8G8R8A8_USCALED, + VK_FORMAT_B8G8R8A8_SSCALED, + VK_FORMAT_B8G8R8A8_UINT, + VK_FORMAT_B8G8R8A8_SINT, + VK_FORMAT_B8G8R8A8_SRGB, + VK_FORMAT_A8B8G8R8_UNORM_PACK32, + VK_FORMAT_A8B8G8R8_SNORM_PACK32, + VK_FORMAT_A8B8G8R8_USCALED_PACK32, + VK_FORMAT_A8B8G8R8_SSCALED_PACK32, + VK_FORMAT_A8B8G8R8_UINT_PACK32, + VK_FORMAT_A8B8G8R8_SINT_PACK32, + VK_FORMAT_A8B8G8R8_SRGB_PACK32, + VK_FORMAT_A2R10G10B10_UNORM_PACK32, + VK_FORMAT_A2R10G10B10_SNORM_PACK32, + VK_FORMAT_A2R10G10B10_USCALED_PACK32, + VK_FORMAT_A2R10G10B10_SSCALED_PACK32, + VK_FORMAT_A2R10G10B10_UINT_PACK32, + VK_FORMAT_A2R10G10B10_SINT_PACK32, + VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VK_FORMAT_A2B10G10R10_SNORM_PACK32, + VK_FORMAT_A2B10G10R10_USCALED_PACK32, + VK_FORMAT_A2B10G10R10_SSCALED_PACK32, + VK_FORMAT_A2B10G10R10_UINT_PACK32, + VK_FORMAT_A2B10G10R10_SINT_PACK32, + VK_FORMAT_R16_UNORM, + VK_FORMAT_R16_SNORM, + VK_FORMAT_R16_USCALED, + VK_FORMAT_R16_SSCALED, + VK_FORMAT_R16_UINT, + VK_FORMAT_R16_SINT, + VK_FORMAT_R16_SFLOAT, + VK_FORMAT_R16G16_UNORM, + VK_FORMAT_R16G16_SNORM, + VK_FORMAT_R16G16_USCALED, + VK_FORMAT_R16G16_SSCALED, + VK_FORMAT_R16G16_UINT, + VK_FORMAT_R16G16_SINT, + VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_R16G16B16_UNORM, + VK_FORMAT_R16G16B16_SNORM, + VK_FORMAT_R16G16B16_USCALED, + VK_FORMAT_R16G16B16_SSCALED, + VK_FORMAT_R16G16B16_UINT, + VK_FORMAT_R16G16B16_SINT, + VK_FORMAT_R16G16B16_SFLOAT, + VK_FORMAT_R16G16B16A16_UNORM, + VK_FORMAT_R16G16B16A16_SNORM, + VK_FORMAT_R16G16B16A16_USCALED, + VK_FORMAT_R16G16B16A16_SSCALED, + VK_FORMAT_R16G16B16A16_UINT, + VK_FORMAT_R16G16B16A16_SINT, + VK_FORMAT_R16G16B16A16_SFLOAT, + VK_FORMAT_R32_UINT, + VK_FORMAT_R32_SINT, + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_R32G32_UINT, + VK_FORMAT_R32G32_SINT, + VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32B32_UINT, + VK_FORMAT_R32G32B32_SINT, + VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_R32G32B32A32_UINT, + VK_FORMAT_R32G32B32A32_SINT, + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_R64_UINT, + VK_FORMAT_R64_SINT, + VK_FORMAT_R64_SFLOAT, + VK_FORMAT_R64G64_UINT, + VK_FORMAT_R64G64_SINT, + VK_FORMAT_R64G64_SFLOAT, + VK_FORMAT_R64G64B64_UINT, + VK_FORMAT_R64G64B64_SINT, + VK_FORMAT_R64G64B64_SFLOAT, + VK_FORMAT_R64G64B64A64_UINT, + VK_FORMAT_R64G64B64A64_SINT, + VK_FORMAT_R64G64B64A64_SFLOAT, + VK_FORMAT_B10G11R11_UFLOAT_PACK32, + VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, + VK_FORMAT_D16_UNORM, + VK_FORMAT_X8_D24_UNORM_PACK32, + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_S8_UINT, + VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_BC1_RGB_UNORM_BLOCK, + VK_FORMAT_BC1_RGB_SRGB_BLOCK, + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC4_SNORM_BLOCK, + VK_FORMAT_BC5_UNORM_BLOCK, + VK_FORMAT_BC5_SNORM_BLOCK, + VK_FORMAT_BC6H_UFLOAT_BLOCK, + VK_FORMAT_BC6H_SFLOAT_BLOCK, + VK_FORMAT_BC7_UNORM_BLOCK, + VK_FORMAT_BC7_SRGB_BLOCK, + VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, + VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, + VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, + VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, + VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, + VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, + VK_FORMAT_EAC_R11_UNORM_BLOCK, + VK_FORMAT_EAC_R11_SNORM_BLOCK, + VK_FORMAT_EAC_R11G11_UNORM_BLOCK, + VK_FORMAT_EAC_R11G11_SNORM_BLOCK, + VK_FORMAT_ASTC_4x4_UNORM_BLOCK, + VK_FORMAT_ASTC_4x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK, + VK_FORMAT_ASTC_5x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK, + VK_FORMAT_ASTC_5x5_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK, + VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK, + VK_FORMAT_ASTC_10x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK, + VK_FORMAT_ASTC_10x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK, + VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK, + VK_FORMAT_ASTC_12x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK, + VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + VK_FORMAT_G8B8G8R8_422_UNORM, + VK_FORMAT_B8G8R8G8_422_UNORM, + VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, + VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, + VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, + VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, + VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, + VK_FORMAT_R10X6_UNORM_PACK16, + VK_FORMAT_R10X6G10X6_UNORM_2PACK16, + VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16, + VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, + VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16, + VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16, + VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, + VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16, + VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, + VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16, + VK_FORMAT_R12X4_UNORM_PACK16, + VK_FORMAT_R12X4G12X4_UNORM_2PACK16, + VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, + VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, + VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16, + VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16, + VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, + VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16, + VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, + VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16, + VK_FORMAT_G16B16G16R16_422_UNORM, + VK_FORMAT_B16G16R16G16_422_UNORM, + VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, + VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, + VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, + VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, + VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, + VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, + VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, + VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, + VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, + VK_FORMAT_A4R4G4B4_UNORM_PACK16, + VK_FORMAT_A4B4G4R4_UNORM_PACK16, + VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK, + VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK, + VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK, + VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK, + VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK, + VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK, + VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK, + VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK, + VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK, + VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK, + VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK, + VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK, + VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK, + VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK, + VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG, + VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG, + VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG, + VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG, + VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG, + VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG, + VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG, + VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG, +// Not supported (yet) by bluevk +// VK_FORMAT_R16G16_SFIXED5_NV, +// VK_FORMAT_A1B5G5R5_UNORM_PACK16_KHR, +// VK_FORMAT_A8_UNORM_KHR, +// VK_FORMAT_A8_UNORM, + VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT, + VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT, + VK_FORMAT_G8B8G8R8_422_UNORM_KHR, + VK_FORMAT_B8G8R8G8_422_UNORM_KHR, + VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM_KHR, + VK_FORMAT_G8_B8R8_2PLANE_420_UNORM_KHR, + VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM_KHR, + VK_FORMAT_G8_B8R8_2PLANE_422_UNORM_KHR, + VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM_KHR, + VK_FORMAT_R10X6_UNORM_PACK16_KHR, + VK_FORMAT_R10X6G10X6_UNORM_2PACK16_KHR, + VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16_KHR, + VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16_KHR, + VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16_KHR, + VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16_KHR, + VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16_KHR, + VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16_KHR, + VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16_KHR, + VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16_KHR, + VK_FORMAT_R12X4_UNORM_PACK16_KHR, + VK_FORMAT_R12X4G12X4_UNORM_2PACK16_KHR, + VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16_KHR, + VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16_KHR, + VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16_KHR, + VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16_KHR, + VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16_KHR, + VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16_KHR, + VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16_KHR, + VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16_KHR, + VK_FORMAT_G16B16G16R16_422_UNORM_KHR, + VK_FORMAT_B16G16R16G16_422_UNORM_KHR, + VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM_KHR, + VK_FORMAT_G16_B16R16_2PLANE_420_UNORM_KHR, + VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM_KHR, + VK_FORMAT_G16_B16R16_2PLANE_422_UNORM_KHR, + VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM_KHR, + VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT, + VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT, + VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT, + VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT, + VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, + VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT, + VK_FORMAT_R16G16_S10_5_NV, +}; + // An Array that will be fixed capacity, but the "size" (as in user added elements) is variable. // Note that this class is movable. template @@ -281,6 +590,7 @@ struct DescriptorSetLayout { } // namespace descset +namespace { // Use constexpr to statically generate a bit count table for 8-bit numbers. struct _BitCountHelper { constexpr _BitCountHelper() : data{} { @@ -303,14 +613,15 @@ struct _BitCountHelper { return count; } - static _BitCountHelper BitCounter; private: uint8_t data[256]; }; +} // namespace anonymous template inline uint8_t countBits(MaskType num) { - return _BitCountHelper::BitCounter.count(num); + static _BitCountHelper BitCounter = {}; + return BitCounter.count(num); } // This is useful for counting the total number of descriptors for both vertex and fragment stages. diff --git a/filament/backend/src/vulkan/caching/VulkanDescriptorSetManager.cpp b/filament/backend/src/vulkan/caching/VulkanDescriptorSetManager.cpp index cad4339a1b0..35228e95e6a 100644 --- a/filament/backend/src/vulkan/caching/VulkanDescriptorSetManager.cpp +++ b/filament/backend/src/vulkan/caching/VulkanDescriptorSetManager.cpp @@ -42,8 +42,6 @@ constexpr uint8_t MAX_INPUT_ATTACHMENT_BINDING = 1; constexpr uint8_t MAX_BINDINGS = MAX_SAMPLER_BINDING + MAX_UBO_BINDING + MAX_INPUT_ATTACHMENT_BINDING; -using ImgUtil = VulkanImageUtility; - using Bitmask = VulkanDescriptorSetLayout::Bitmask; using DescriptorCount = VulkanDescriptorSetLayout::Count; using UBOMap = std::array, MAX_UBO_BINDING>; @@ -1010,15 +1008,15 @@ class VulkanDescriptorSetManager::Impl { } else { info.imageView = texture->getViewForType(range, expectedType); } - info.imageLayout = ImgUtil::getVkLayout(texture->getPrimaryImageLayout()); + info.imageLayout = imgutil::getVkLayout(texture->getPrimaryImageLayout()); mSamplerMap[binding] = {info, texture}; mResources.acquire(texture); } void updateInputAttachment(Handle, VulkanAttachment attachment) noexcept { VkDescriptorImageInfo info = { - .imageView = attachment.getImageView(VK_IMAGE_ASPECT_COLOR_BIT), - .imageLayout = ImgUtil::getVkLayout(attachment.getLayout()), + .imageView = attachment.getImageView(), + .imageLayout = imgutil::getVkLayout(attachment.getLayout()), }; mInputAttachment = {attachment, info}; mResources.acquire(attachment.texture); @@ -1042,7 +1040,7 @@ class VulkanDescriptorSetManager::Impl { mPlaceHolderImageInfo = { .sampler = sampler, .imageView = texture->getPrimaryImageView(), - .imageLayout = ImgUtil::getVkLayout(texture->getPrimaryImageLayout()), + .imageLayout = imgutil::getVkLayout(texture->getPrimaryImageLayout()), }; } diff --git a/filament/backend/src/vulkan/platform/VulkanPlatform.cpp b/filament/backend/src/vulkan/platform/VulkanPlatform.cpp index 1510669b285..687887a6988 100644 --- a/filament/backend/src/vulkan/platform/VulkanPlatform.cpp +++ b/filament/backend/src/vulkan/platform/VulkanPlatform.cpp @@ -137,18 +137,14 @@ void printDepthFormats(VkPhysicalDevice device) { const VkFormatFeatureFlags required = VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; utils::slog.i << "Sampleable depth formats: "; - for (VkFormat format = (VkFormat) 1;;) { + for (VkFormat format : ALL_VK_FORMATS) { VkFormatProperties props; vkGetPhysicalDeviceFormatProperties(device, format, &props); if ((props.optimalTilingFeatures & required) == required) { utils::slog.i << format << " "; } - if (format == VK_FORMAT_ASTC_12x12_SRGB_BLOCK) { - utils::slog.i << utils::io::endl; - break; - } - format = (VkFormat) (1 + (int) format); } + utils::slog.i << utils::io::endl; } #endif @@ -344,13 +340,13 @@ std::tuple pruneExtensions(VkPhysicalDevice device, ExtensionSet newInstExts = instExts; ExtensionSet newDeviceExts = deviceExts; -#if FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS) +#if FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS) // debugUtils and debugMarkers extensions are used mutually exclusively. if (newInstExts.find(VK_EXT_DEBUG_UTILS_EXTENSION_NAME) != newInstExts.end() && newDeviceExts.find(VK_EXT_DEBUG_MARKER_EXTENSION_NAME) != newDeviceExts.end()) { newDeviceExts.erase(VK_EXT_DEBUG_MARKER_EXTENSION_NAME); } -#endif +#endif #if FVK_ENABLED(FVK_DEBUG_VALIDATION) // debugMarker must also request debugReport the instance extension. So check if that's present. @@ -405,7 +401,7 @@ inline int deviceTypeOrder(VkPhysicalDeviceType deviceType) { case VK_PHYSICAL_DEVICE_TYPE_OTHER: return 1; default: - utils::slog.w << "devcieTypeOrder: Unexpected deviceType: " << deviceType + utils::slog.w << "deviceTypeOrder: Unexpected deviceType: " << deviceType << utils::io::endl; return -1; } @@ -500,7 +496,7 @@ VkPhysicalDevice selectPhysicalDevice(VkInstance instance, return device; } -VkFormatList findAttachmentDepthFormats(VkPhysicalDevice device) { +VkFormatList findAttachmentDepthStencilFormats(VkPhysicalDevice device) { VkFormatFeatureFlags const features = VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; // The ordering here indicates the preference of choosing depth+stencil format. @@ -524,6 +520,24 @@ VkFormatList findAttachmentDepthFormats(VkPhysicalDevice device) { return ret; } +VkFormatList findBlittableDepthStencilFormats(VkPhysicalDevice device) { + std::vector selectedFormats; + VkFormatFeatureFlags const required = VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + for (VkFormat format : ALL_VK_FORMATS) { + if (isVkDepthFormat(format)) { + VkFormatProperties props; + vkGetPhysicalDeviceFormatProperties(device, format, &props); + if ((props.optimalTilingFeatures & required) == required) { + selectedFormats.push_back(format); + } + } + } + VkFormatList ret(selectedFormats.size()); + std::copy(selectedFormats.begin(), selectedFormats.end(), ret.begin()); + return ret; +} + }// anonymous namespace using SwapChainPtr = VulkanPlatform::SwapChainPtr; @@ -669,9 +683,11 @@ Driver* VulkanPlatform::createDriver(void* sharedContext, "Debug utils should not be enabled in release build."); #endif - context.mDepthFormats = findAttachmentDepthFormats(mImpl->mPhysicalDevice); + context.mDepthStencilFormats = findAttachmentDepthStencilFormats(mImpl->mPhysicalDevice); + context.mBlittableDepthStencilFormats = + findBlittableDepthStencilFormats(mImpl->mPhysicalDevice); - assert_invariant(context.mDepthFormats.size() > 0); + assert_invariant(context.mDepthStencilFormats.size() > 0); #if FVK_ENABLED(FVK_DEBUG_VALIDATION) printDepthFormats(mImpl->mPhysicalDevice); diff --git a/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp b/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp index a5a31a8d95f..5627e2610c2 100644 --- a/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp +++ b/filament/backend/src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp @@ -243,7 +243,7 @@ VkResult VulkanPlatformSurfaceSwapChain::create() { mSwapChainBundle.colors = enumerate(vkGetSwapchainImagesKHR, mDevice, mSwapchain); mSwapChainBundle.colorFormat = surfaceFormat.format; mSwapChainBundle.depthFormat = - selectDepthFormat(mContext.getAttachmentDepthFormats(), mHasStencil); + selectDepthFormat(mContext.getAttachmentDepthStencilFormats(), mHasStencil); mSwapChainBundle.depth = createImage(mSwapChainBundle.extent, mSwapChainBundle.depthFormat); slog.i << "vkCreateSwapchain" @@ -330,7 +330,7 @@ VulkanPlatformHeadlessSwapChain::VulkanPlatformHeadlessSwapChain(VulkanContext c bool const hasStencil = (flags & backend::SWAP_CHAIN_HAS_STENCIL_BUFFER) != 0; mSwapChainBundle.depthFormat = - selectDepthFormat(mContext.getAttachmentDepthFormats(), hasStencil); + selectDepthFormat(mContext.getAttachmentDepthStencilFormats(), hasStencil); mSwapChainBundle.depth = createImage(extent, mSwapChainBundle.depthFormat); } diff --git a/filament/backend/test/test_Blit.cpp b/filament/backend/test/test_Blit.cpp index 04809884453..61c8b89e5e3 100644 --- a/filament/backend/test/test_Blit.cpp +++ b/filament/backend/test/test_Blit.cpp @@ -238,13 +238,13 @@ TEST_F(BackendTest, ColorMagnify) { {0, 0, kSrcTexWidth >> srcLevel, kSrcTexHeight >> srcLevel}, SamplerMagFilter::LINEAR); // Push through an empty frame to allow the texture to upload and the blit to execute. - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.commit(swapChain); api.endFrame(0); // Grab a screenshot. ScreenshotParams params { kDstTexWidth, kDstTexHeight, "ColorMagnify.png" }; - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); dumpScreenshot(api, dstRenderTargets[0], ¶ms); api.commit(swapChain); api.endFrame(0); @@ -402,7 +402,7 @@ TEST_F(BackendTest, ColorResolve) { }); // FIXME: on Metal this triangle is not drawn. Can't understand why. - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.beginRenderPass(srcRenderTarget, params); api.bindUniformBuffer(0, ubuffer); api.draw(state, triangle.getRenderPrimitive(), 0, 3, 1); @@ -484,13 +484,13 @@ TEST_F(BackendTest, Blit2DTextureArray) { {0, 0, kSrcTexWidth >> srcLevel, kSrcTexHeight >> srcLevel}, SamplerMagFilter::LINEAR); // Push through an empty frame to allow the texture to upload and the blit to execute. - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.commit(swapChain); api.endFrame(0); // Grab a screenshot. ScreenshotParams params { kDstTexWidth, kDstTexHeight, "Blit2DTextureArray.png" }; - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); dumpScreenshot(api, dstRenderTarget, ¶ms); api.commit(swapChain); api.endFrame(0); @@ -574,13 +574,13 @@ TEST_F(BackendTest, BlitRegion) { SamplerMagFilter::LINEAR); // Push through an empty frame to allow the texture to upload and the blit to execute. - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.commit(swapChain); api.endFrame(0); // Grab a screenshot. ScreenshotParams params { kDstTexWidth, kDstTexHeight, "BlitRegion.png" }; - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); dumpScreenshot(api, dstRenderTarget, ¶ms); api.commit(swapChain); api.endFrame(0); @@ -655,7 +655,7 @@ TEST_F(BackendTest, BlitRegionToSwapChain) { .height = kDstTexHeight - 10, }; - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.blitDEPRECATED(TargetBufferFlags::COLOR0, dstRenderTarget, dstRect, srcRenderTargets[srcLevel], diff --git a/filament/backend/test/test_BufferUpdates.cpp b/filament/backend/test/test_BufferUpdates.cpp index 3207fdae9d9..bde771448a1 100644 --- a/filament/backend/test/test_BufferUpdates.cpp +++ b/filament/backend/test/test_BufferUpdates.cpp @@ -143,7 +143,7 @@ TEST_F(BackendTest, VertexBufferUpdate) { } getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); // Draw 10 triangles, updating the vertex buffer / index buffer each time. size_t triangleIndex = 0; diff --git a/filament/backend/test/test_FeedbackLoops.cpp b/filament/backend/test/test_FeedbackLoops.cpp index 04f72578013..232127ede57 100644 --- a/filament/backend/test/test_FeedbackLoops.cpp +++ b/filament/backend/test/test_FeedbackLoops.cpp @@ -200,7 +200,7 @@ TEST_F(BackendTest, FeedbackLoops) { auto ubuffer = api.createBufferObject(sizeof(MaterialParams), BufferObjectBinding::UNIFORM, BufferUsage::STATIC); api.makeCurrent(swapChain, swapChain); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.bindSamplers(0, sgroup); api.bindUniformBuffer(0, ubuffer); diff --git a/filament/backend/test/test_LoadImage.cpp b/filament/backend/test/test_LoadImage.cpp index 8e1d8f3b562..2af57eb7196 100644 --- a/filament/backend/test/test_LoadImage.cpp +++ b/filament/backend/test/test_LoadImage.cpp @@ -414,7 +414,7 @@ TEST_F(BackendTest, UpdateImageSRGB) { api.update3DImage(texture, 0, 0, 0, 0, 512, 512, 1, std::move(descriptor)); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); // Update samplers. SamplerGroup samplers(1); @@ -486,7 +486,7 @@ TEST_F(BackendTest, UpdateImageMipLevel) { PixelBufferDescriptor descriptor = checkerboardPixelBuffer(pixelFormat, pixelType, 512); api.update3DImage(texture, /* level*/ 1, 0, 0, 0, 512, 512, 1, std::move(descriptor)); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); // Update samplers. SamplerGroup samplers(1); @@ -570,7 +570,7 @@ TEST_F(BackendTest, UpdateImage3D) { api.update3DImage(texture, 0, 0, 0, 0, 512, 512, 4, std::move(descriptor)); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); // Update samplers. SamplerGroup samplers(1); diff --git a/filament/backend/test/test_MRT.cpp b/filament/backend/test/test_MRT.cpp index bd29d27e733..21a30f7104a 100644 --- a/filament/backend/test/test_MRT.cpp +++ b/filament/backend/test/test_MRT.cpp @@ -127,7 +127,7 @@ TEST_F(BackendTest, MRT) { getDriverApi().startCapture(0); getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); // Draw a triangle. getDriverApi().beginRenderPass(renderTarget, params); diff --git a/filament/backend/test/test_MipLevels.cpp b/filament/backend/test/test_MipLevels.cpp index 76497e1b2a4..3c54839dad1 100644 --- a/filament/backend/test/test_MipLevels.cpp +++ b/filament/backend/test/test_MipLevels.cpp @@ -143,7 +143,7 @@ TEST_F(BackendTest, SetMinMaxLevel) { TrianglePrimitive triangle(api); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); // We set the base mip to 1, and the max mip to 3 // Level 0: 128x128 (red) diff --git a/filament/backend/test/test_MissingRequiredAttributes.cpp b/filament/backend/test/test_MissingRequiredAttributes.cpp index c56cece5993..453b1bbcb4b 100644 --- a/filament/backend/test/test_MissingRequiredAttributes.cpp +++ b/filament/backend/test/test_MissingRequiredAttributes.cpp @@ -100,7 +100,7 @@ TEST_F(BackendTest, MissingRequiredAttributes) { getDriverApi().startCapture(0); getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); // Render a triangle. getDriverApi().beginRenderPass(defaultRenderTarget, params); diff --git a/filament/backend/test/test_ReadPixels.cpp b/filament/backend/test/test_ReadPixels.cpp index 98d8c41705d..ee1a37cd9c2 100644 --- a/filament/backend/test/test_ReadPixels.cpp +++ b/filament/backend/test/test_ReadPixels.cpp @@ -295,7 +295,7 @@ TEST_F(ReadPixelsTest, ReadPixels) { params.viewport.width = t.getRenderTargetSize(); getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); // Render a white triangle over blue. getDriverApi().beginRenderPass(renderTarget, params); @@ -434,7 +434,7 @@ TEST_F(ReadPixelsTest, ReadPixelsPerformance) { } getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); // Render some content, just so we don't read back uninitialized data. getDriverApi().beginRenderPass(renderTarget, params); diff --git a/filament/backend/test/test_RenderExternalImage.cpp b/filament/backend/test/test_RenderExternalImage.cpp index 9e8d1d5fa70..b8261434b23 100644 --- a/filament/backend/test/test_RenderExternalImage.cpp +++ b/filament/backend/test/test_RenderExternalImage.cpp @@ -109,7 +109,7 @@ TEST_F(BackendTest, RenderExternalImageWithoutSet) { getDriverApi().startCapture(0); getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); SamplerGroup samplers(1); samplers.setSampler(0, { texture, {} }); @@ -231,7 +231,7 @@ TEST_F(BackendTest, RenderExternalImage) { getDriverApi().startCapture(0); getDriverApi().makeCurrent(swapChain, swapChain); - getDriverApi().beginFrame(0, 0); + getDriverApi().beginFrame(0, 0, 0); SamplerGroup samplers(1); samplers.setSampler(0, { texture, {} }); diff --git a/filament/backend/test/test_Scissor.cpp b/filament/backend/test/test_Scissor.cpp index 985ef9414b8..42f2fe750b0 100644 --- a/filament/backend/test/test_Scissor.cpp +++ b/filament/backend/test/test_Scissor.cpp @@ -136,7 +136,7 @@ TEST_F(BackendTest, ScissorViewportRegion) { ps.rasterState.depthWrite = false; api.makeCurrent(swapChain, swapChain); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.beginRenderPass(srcRenderTarget, params); api.scissor(scissor); @@ -227,7 +227,7 @@ TEST_F(BackendTest, ScissorViewportEdgeCases) { ps.rasterState.depthWrite = false; api.makeCurrent(swapChain, swapChain); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.beginRenderPass(renderTarget, params); api.scissor(scissor); diff --git a/filament/backend/test/test_StencilBuffer.cpp b/filament/backend/test/test_StencilBuffer.cpp index aa02830b547..4e9d2183294 100644 --- a/filament/backend/test/test_StencilBuffer.cpp +++ b/filament/backend/test/test_StencilBuffer.cpp @@ -110,7 +110,7 @@ class BasicStencilBufferTest : public BackendTest { ps.stencilState.front.stencilOpDepthStencilPass = StencilOperation::INCR; api.makeCurrent(swapChain, swapChain); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.beginRenderPass(renderTarget, params); api.draw(ps, smallTriangle.getRenderPrimitive(), 0, 3, 1); @@ -237,7 +237,7 @@ TEST_F(BasicStencilBufferTest, StencilBufferMSAA) { ps.stencilState.front.stencilOpDepthStencilPass = StencilOperation::INCR; api.makeCurrent(swapChain, swapChain); - api.beginFrame(0, 0); + api.beginFrame(0, 0, 0); api.beginRenderPass(renderTarget0, params); api.draw(ps, smallTriangle.getRenderPrimitive(), 0, 3, 1); diff --git a/filament/include/filament/Engine.h b/filament/include/filament/Engine.h index 033cb8066fd..a737469b768 100644 --- a/filament/include/filament/Engine.h +++ b/filament/include/filament/Engine.h @@ -340,6 +340,28 @@ class UTILS_PUBLIC Engine { * Disable backend handles use-after-free checks. */ bool disableHandleUseAfterFreeCheck = false; + + /* + * Sets a preferred shader language for Filament to use. + * + * The Metal backend supports two shader languages: MSL (Metal Shading Language) and + * METAL_LIBRARY (precompiled .metallib). This option controls which shader language is + * used when materials contain both. + * + * By default, when preferredShaderLanguage is unset, Filament will prefer METAL_LIBRARY + * shaders if present within a material, falling back to MSL. Setting + * preferredShaderLanguage to ShaderLanguage::MSL will instead instruct Filament to check + * for the presence of MSL in a material first, falling back to METAL_LIBRARY if MSL is not + * present. + * + * When using a non-Metal backend, setting this has no effect. + */ + enum class ShaderLanguage { + DEFAULT = 0, + MSL = 1, + METAL_LIBRARY = 2, + }; + ShaderLanguage preferredShaderLanguage = ShaderLanguage::DEFAULT; }; @@ -867,6 +889,15 @@ class UTILS_PUBLIC Engine { */ void flush(); + /** + * Get paused state of rendering thread. + * + *

Warning: This is an experimental API. + * + * @see setPaused + */ + bool isPaused() const noexcept; + /** * Pause or resume rendering thread. * diff --git a/filament/include/filament/SwapChain.h b/filament/include/filament/SwapChain.h index 0af01afc966..585e016eec0 100644 --- a/filament/include/filament/SwapChain.h +++ b/filament/include/filament/SwapChain.h @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -264,13 +265,22 @@ class UTILS_PUBLIC SwapChain : public FilamentAPI { * backend. * * A FrameScheduledCallback can be set on an individual SwapChain through - * SwapChain::setFrameScheduledCallback. If the callback is set, then the SwapChain will *not* - * automatically schedule itself for presentation. Instead, the application must call the - * PresentCallable passed to the FrameScheduledCallback. + * SwapChain::setFrameScheduledCallback. If the callback is set for a given frame, then the + * SwapChain will *not* automatically schedule itself for presentation. Instead, the application + * must call the PresentCallable passed to the FrameScheduledCallback. * - * There may be only one FrameScheduledCallback set per SwapChain. A call to - * SwapChain::setFrameScheduledCallback will overwrite any previous FrameScheduledCallbacks set - * on the same SwapChain. + * Each SwapChain can have only one FrameScheduledCallback set per frame. If + * setFrameScheduledCallback is called multiple times on the same SwapChain before + * Renderer::endFrame(), the most recent call effectively overwrites any previously set + * callback. This allows the callback to be updated as needed before the frame has finished + * encoding. + * + * The "last" callback set by setFrameScheduledCallback gets "latched" when Renderer::endFrame() + * is executed. At this point, the state of the callback is fixed and is the one used for the + * frame that was just encoded. Subsequent changes to the callback using + * setFrameScheduledCallback after endFrame() apply to the next frame. + * + * Use \c setFrameScheduledCallback() (with default arguments) to unset the callback. * * If your application delays the call to the PresentCallable by, for example, calling it on a * separate thread, you must ensure all PresentCallables have been called before shutting down @@ -278,28 +288,26 @@ class UTILS_PUBLIC SwapChain : public FilamentAPI { * Engine::shutdown. This is necessary to ensure the Filament Engine has had a chance to clean * up all memory related to frame presentation. * - * @param callback A callback, or nullptr to unset. - * @param user An optional pointer to user data passed to the callback function. + * @param handler Handler to dispatch the callback or nullptr for the default handler. + * @param callback Callback called when the frame is scheduled. * * @remark Only Filament's Metal backend supports PresentCallables and frame callbacks. Other * backends ignore the callback (which will never be called) and proceed normally. * - * @remark The SwapChain::FrameScheduledCallback is called on an arbitrary thread. - * + * @see CallbackHandler * @see PresentCallable */ - void setFrameScheduledCallback(FrameScheduledCallback UTILS_NULLABLE callback, - void* UTILS_NULLABLE user = nullptr); + void setFrameScheduledCallback(backend::CallbackHandler* UTILS_NULLABLE handler = nullptr, + FrameScheduledCallback&& callback = {}); /** - * Returns the SwapChain::FrameScheduledCallback that was previously set with - * SwapChain::setFrameScheduledCallback, or nullptr if one is not set. + * Returns whether or not this SwapChain currently has a FrameScheduledCallback set. * - * @return the previously-set FrameScheduledCallback, or nullptr + * @return true, if the last call to setFrameScheduledCallback set a callback * * @see SwapChain::setFrameCompletedCallback */ - UTILS_NULLABLE FrameScheduledCallback getFrameScheduledCallback() const noexcept; + bool isFrameScheduledCallbackSet() const noexcept; /** * FrameCompletedCallback is a callback function that notifies an application when a frame's diff --git a/filament/src/Engine.cpp b/filament/src/Engine.cpp index d0042f63320..ca370bc85cc 100644 --- a/filament/src/Engine.cpp +++ b/filament/src/Engine.cpp @@ -308,6 +308,11 @@ utils::JobSystem& Engine::getJobSystem() noexcept { return downcast(this)->getJobSystem(); } +bool Engine::isPaused() const noexcept { + ASSERT_PRECONDITION(UTILS_HAS_THREADING, "Pause is meant for multi-threaded platforms."); + return downcast(this)->isPaused(); +} + void Engine::setPaused(bool paused) { ASSERT_PRECONDITION(UTILS_HAS_THREADING, "Pause is meant for multi-threaded platforms."); downcast(this)->setPaused(paused); diff --git a/filament/src/PerShadowMapUniforms.cpp b/filament/src/PerShadowMapUniforms.cpp index 3515ef2b436..458b15db836 100644 --- a/filament/src/PerShadowMapUniforms.cpp +++ b/filament/src/PerShadowMapUniforms.cpp @@ -16,12 +16,19 @@ #include "PerShadowMapUniforms.h" -#include "ShadowMapManager.h" - #include "details/Camera.h" #include "details/Engine.h" #include +#include + +#include + +#include + +#include + +#include namespace filament { @@ -88,8 +95,8 @@ void PerShadowMapUniforms::prepareViewport(Transaction const& transaction, void PerShadowMapUniforms::prepareTime(Transaction const& transaction, FEngine& engine, math::float4 const& userTime) noexcept { auto& s = edit(transaction); - const uint64_t oneSecondRemainder = engine.getEngineTime().count() % 1000000000; - const float fraction = float(double(oneSecondRemainder) / 1000000000.0); + const uint64_t oneSecondRemainder = engine.getEngineTime().count() % 1'000'000'000; + const float fraction = float(double(oneSecondRemainder) / 1'000'000'000.0); s.time = fraction; s.userTime = userTime; } @@ -102,7 +109,6 @@ void PerShadowMapUniforms::prepareShadowMapping(Transaction const& transaction, s.vsmExponent = highPrecision ? high : low; } - PerShadowMapUniforms::Transaction PerShadowMapUniforms::open(backend::DriverApi& driver) noexcept { Transaction transaction; // TODO: use out-of-line buffer if too large @@ -114,7 +120,7 @@ PerShadowMapUniforms::Transaction PerShadowMapUniforms::open(backend::DriverApi& void PerShadowMapUniforms::commit(Transaction& transaction, backend::DriverApi& driver) noexcept { driver.updateBufferObject(mUniformBufferHandle, { - transaction.uniforms, sizeof(PerViewUib) }, 0); + transaction.uniforms, sizeof(PerViewUib) }, 0); transaction.uniforms = nullptr; } diff --git a/filament/src/PerShadowMapUniforms.h b/filament/src/PerShadowMapUniforms.h index 42ca2eb1593..05fcdd82e5f 100644 --- a/filament/src/PerShadowMapUniforms.h +++ b/filament/src/PerShadowMapUniforms.h @@ -17,18 +17,13 @@ #ifndef TNT_FILAMENT_PERSHADOWMAPUNIFORMS_H #define TNT_FILAMENT_PERSHADOWMAPUNIFORMS_H -#include - #include -#include - -#include "TypedUniformBuffer.h" +#include +#include #include -#include - -#include +#include namespace filament { @@ -40,13 +35,11 @@ class LightManager; /* * PerShadowMapUniforms manages the UBO needed to generate our shadow maps. Internally it just * holds onto a `PerViewUniform` UBO handle, but doesn't keep any shadow copy of it, instead it - * writes the data directly into the commandstream, for this reason partial update of the data + * writes the data directly into the CommandStream, for this reason partial update of the data * is not possible. */ class PerShadowMapUniforms { - using LightManagerInstance = utils::EntityInstance; - public: class Transaction { friend PerShadowMapUniforms; diff --git a/filament/src/PostProcessManager.cpp b/filament/src/PostProcessManager.cpp index e570a9f632f..59520e0c143 100644 --- a/filament/src/PostProcessManager.cpp +++ b/filament/src/PostProcessManager.cpp @@ -265,6 +265,7 @@ static const PostProcessManager::MaterialInfo sMaterialList[] = { { "bilateralBlur", MATERIAL(BILATERALBLUR) }, { "bilateralBlurBentNormals", MATERIAL(BILATERALBLURBENTNORMALS) }, { "blitArray", MATERIAL(BLITARRAY) }, + { "blitDepth", MATERIAL(BLITDEPTH) }, { "bloomDownsample", MATERIAL(BLOOMDOWNSAMPLE) }, { "bloomDownsample2x", MATERIAL(BLOOMDOWNSAMPLE2X) }, { "bloomDownsample9", MATERIAL(BLOOMDOWNSAMPLE9) }, @@ -720,46 +721,16 @@ FrameGraphId PostProcessManager::screenSpaceAmbientOcclusion( * attachment, even if writes are not enabled. This restriction is lifted on desktop GL and * Vulkan. The Metal situation is unclear. * In this case, we need to duplicate the depth texture to use it as an attachment. - * The pass below that does this is automatically culled if not needed, which is decided by - * each backend. + * + * This is also needed in Vulkan for a similar reason. */ + FrameGraphId duplicateDepthOutput = {}; + if (!mWorkaroundAllowReadOnlyAncillaryFeedbackLoop) { + duplicateDepthOutput = blitDepth(fg, depth); + } - struct DuplicateDepthPassData { - FrameGraphId input; - FrameGraphId output; - }; - - // Needed for Vulkan and GLES. Some GLES implementations don't need it. Never needed for Metal. - auto& duplicateDepthPass = fg.addPass("Duplicate Depth Pass", - [&](FrameGraph::Builder& builder, auto& data) { - data.input = builder.read(depth, - FrameGraphTexture::Usage::BLIT_SRC); - - auto desc = builder.getDescriptor(data.input); - desc.levels = 1; // only copy the base level - - // create a new buffer for the copy - data.output = builder.createTexture("Depth Texture Copy", desc); - - // output is an attachment - data.output = builder.write(data.output, - FrameGraphTexture::Usage::BLIT_DST); - }, - [=](FrameGraphResources const& resources, auto const& data, DriverApi& driver) { - auto const& src = resources.getTexture(data.input); - auto const& dst = resources.getTexture(data.output); - auto const& srcSubDesc = resources.getSubResourceDescriptor(data.input); - auto const& dstSubDesc = resources.getSubResourceDescriptor(data.output); - auto const& desc = resources.getDescriptor(data.output); - assert_invariant(desc.samples == resources.getDescriptor(data.input).samples); - // here we can guarantee that src and dst format and size match, by construction. - driver.blit( - dst, dstSubDesc.level, dstSubDesc.layer, { 0, 0 }, - src, srcSubDesc.level, srcSubDesc.layer, { 0, 0 }, - { desc.width, desc.height }); - }); - - auto& SSAOPass = fg.addPass("SSAO Pass", + auto& SSAOPass = fg.addPass( + "SSAO Pass", [&](FrameGraph::Builder& builder, auto& data) { auto const& desc = builder.getDescriptor(depth); @@ -788,10 +759,7 @@ FrameGraphId PostProcessManager::screenSpaceAmbientOcclusion( // reading into it even though they were not written in the depth buffer. // The bilateral filter in the blur pass will ignore pixels at infinity. - auto depthAttachment = data.depth; - if (!mWorkaroundAllowReadOnlyAncillaryFeedbackLoop) { - depthAttachment = duplicateDepthPass->output; - } + auto depthAttachment = duplicateDepthOutput ? duplicateDepthOutput : data.depth; depthAttachment = builder.read(depthAttachment, FrameGraphTexture::Usage::DEPTH_ATTACHMENT); @@ -801,8 +769,7 @@ FrameGraphId PostProcessManager::screenSpaceAmbientOcclusion( .clearFlags = TargetBufferFlags::COLOR0 | TargetBufferFlags::COLOR1 }); }, - [=](FrameGraphResources const& resources, - auto const& data, DriverApi& driver) { + [=](FrameGraphResources const& resources, auto const& data, DriverApi& driver) { auto depth = resources.getTexture(data.depth); auto ssao = resources.getRenderPassInfo(); auto const& desc = resources.getDescriptor(data.depth); @@ -2143,6 +2110,7 @@ PostProcessManager::BloomPassOutput PostProcessManager::bloom(FrameGraph& fg, mi->setParameter("source", hwOut, { .filterMag = SamplerMagFilter::LINEAR, .filterMin = SamplerMinFilter::LINEAR_MIPMAP_NEAREST}); + mi->commit(driver); mi->use(driver); auto pipeline = material.getPipelineState(mEngine); @@ -3043,7 +3011,7 @@ FrameGraphId PostProcessManager::blit(FrameGraph& fg, bool tr auto& ppQuadBlit = fg.addPass("blitting", [&](FrameGraph::Builder& builder, auto& data) { data.input = builder.sample(input); - data.output = builder.createTexture("upscaled output", outDesc); + data.output = builder.createTexture("blit output", outDesc); data.output = builder.write(data.output, FrameGraphTexture::Usage::COLOR_ATTACHMENT); builder.declareRenderPass(builder.getName(data.output), { @@ -3087,6 +3055,87 @@ FrameGraphId PostProcessManager::blit(FrameGraph& fg, bool tr return ppQuadBlit->output; } +FrameGraphId PostProcessManager::blitDepth(FrameGraph& fg, + FrameGraphId input) noexcept { + auto const& inputDesc = fg.getDescriptor(input); + filament::Viewport const vp = {0, 0, inputDesc.width, inputDesc.height}; + bool const hardwareBlitSupported = + mEngine.getDriverApi().isDepthStencilBlitSupported(inputDesc.format); + + struct BlitData { + FrameGraphId input; + FrameGraphId output; + }; + + if (hardwareBlitSupported) { + auto& depthPass = fg.addPass( + "Depth Blit", + [&](FrameGraph::Builder& builder, auto& data) { + data.input = builder.read(input, FrameGraphTexture::Usage::BLIT_SRC); + + auto desc = builder.getDescriptor(data.input); + desc.levels = 1;// only copy the base level + + // create a new buffer for the copy + data.output = builder.createTexture("depth blit output", desc); + + // output is an attachment + data.output = builder.write(data.output, FrameGraphTexture::Usage::BLIT_DST); + }, + [=](FrameGraphResources const& resources, auto const& data, DriverApi& driver) { + auto const& src = resources.getTexture(data.input); + auto const& dst = resources.getTexture(data.output); + auto const& srcSubDesc = resources.getSubResourceDescriptor(data.input); + auto const& dstSubDesc = resources.getSubResourceDescriptor(data.output); + auto const& desc = resources.getDescriptor(data.output); + assert_invariant(desc.samples == resources.getDescriptor(data.input).samples); + // here we can guarantee that src and dst format and size match, by + // construction. + driver.blit( + dst, dstSubDesc.level, dstSubDesc.layer, { 0, 0 }, + src, srcSubDesc.level, srcSubDesc.layer, { 0, 0 }, + { desc.width, desc.height }); + }); + return depthPass->output; + } + // Otherwise, we would do a shader-based blit. + + auto& ppQuadBlit = fg.addPass( + "Depth Blit (Shader)", + [&](FrameGraph::Builder& builder, auto& data) { + data.input = builder.sample(input); + // Note that this is a same size/format blit. + auto const& outputDesc = inputDesc; + data.output = builder.createTexture("depth blit output", outputDesc); + data.output = + builder.write(data.output, FrameGraphTexture::Usage::DEPTH_ATTACHMENT); + builder.declareRenderPass(builder.getName(data.output), + {.attachments = {.depth = {data.output}}}); + }, + [=](FrameGraphResources const& resources, auto const& data, DriverApi& driver) { + auto depth = resources.getTexture(data.input); + auto const& inputDesc = resources.getDescriptor(data.input); + auto out = resources.getRenderPassInfo(); + + // -------------------------------------------------------------------------------- + // set uniforms + PostProcessMaterial const& material = getPostProcessMaterial("blitDepth"); + auto* mi = material.getMaterialInstance(mEngine); + mi->setParameter("depth", depth, + { + .filterMag = SamplerMagFilter::NEAREST, + .filterMin = SamplerMinFilter::NEAREST, + }); + mi->setParameter("viewport", + float4{float(vp.left) / inputDesc.width, + float(vp.bottom) / inputDesc.height, float(vp.width) / inputDesc.width, + float(vp.height) / inputDesc.height}); + commitAndRender(out, material, driver); + }); + + return ppQuadBlit->output; +} + FrameGraphId PostProcessManager::resolve(FrameGraph& fg, const char* outputBufferName, FrameGraphId input, FrameGraphTexture::Descriptor outDesc) noexcept { @@ -3283,7 +3332,7 @@ FrameGraphId PostProcessManager::debugShadowCascades(FrameGra return debugShadowCascadePass->output; } -FrameGraphId PostProcessManager::debugCombineArrayTexture(FrameGraph& fg, +FrameGraphId PostProcessManager::debugCombineArrayTexture(FrameGraph& fg, bool translucent, FrameGraphId input, filament::Viewport const& vp, FrameGraphTexture::Descriptor const& outDesc, SamplerMagFilter filterMag, diff --git a/filament/src/PostProcessManager.h b/filament/src/PostProcessManager.h index 432a9e3fff3..76f5bcd325f 100644 --- a/filament/src/PostProcessManager.h +++ b/filament/src/PostProcessManager.h @@ -250,13 +250,17 @@ class PostProcessManager { FrameGraphTexture::Descriptor const& outDesc, bool translucent); - // upscale/downscale blitter using shaders + // color blitter using shaders FrameGraphId blit(FrameGraph& fg, bool translucent, FrameGraphId input, filament::Viewport const& vp, FrameGraphTexture::Descriptor const& outDesc, backend::SamplerMagFilter filterMag, backend::SamplerMinFilter filterMin) noexcept; + // depth blitter using shaders + FrameGraphId blitDepth(FrameGraph& fg, + FrameGraphId input) noexcept; + // Resolves base level of input and outputs a texture from outDesc. // outDesc with, height, format and samples will be overridden. FrameGraphId resolve(FrameGraph& fg, diff --git a/filament/src/RenderPass.cpp b/filament/src/RenderPass.cpp index 16489cb098c..c091f34113e 100644 --- a/filament/src/RenderPass.cpp +++ b/filament/src/RenderPass.cpp @@ -18,8 +18,8 @@ #include "RenderPrimitive.h" #include "ShadowMap.h" +#include "SharedHandle.h" -#include "details/Camera.h" #include "details/Material.h" #include "details/MaterialInstance.h" #include "details/View.h" @@ -88,24 +88,26 @@ RenderPass RenderPassBuilder::build(FEngine& engine) { // ------------------------------------------------------------------------------------------------ +void RenderPass::BufferObjectHandleDeleter::operator()( + backend::BufferObjectHandle handle) noexcept { + if (handle) { + driver.get().destroyBufferObject(handle); + } +} + +// ------------------------------------------------------------------------------------------------ + RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexcept : mRenderableSoa(*builder.mRenderableSoa), - mVisibleRenderables(builder.mVisibleRenderables), - mUboHandle(builder.mUboHandle), - mCameraPosition(builder.mCameraPosition), - mCameraForwardVector(builder.mCameraForwardVector), - mFlags(builder.mFlags), - mVariant(builder.mVariant), - mVisibilityMask(builder.mVisibilityMask), mScissorViewport(builder.mScissorViewport), mCustomCommands(engine.getPerRenderPassArena()) { // compute the number of commands we need updateSummedPrimitiveCounts( - const_cast(mRenderableSoa), mVisibleRenderables); + const_cast(mRenderableSoa), builder.mVisibleRenderables); uint32_t commandCount = - FScene::getPrimitiveCount(mRenderableSoa, mVisibleRenderables.last); + FScene::getPrimitiveCount(mRenderableSoa, builder.mVisibleRenderables.last); const bool colorPass = bool(builder.mCommandTypeFlags & CommandTypeFlags::COLOR); const bool depthPass = bool(builder.mCommandTypeFlags & CommandTypeFlags::DEPTH); commandCount *= uint32_t(colorPass * 2 + depthPass); @@ -129,7 +131,15 @@ RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexce mCommandBegin = curr; mCommandEnd = curr + commandCount + customCommandCount; - appendCommands(engine, { curr, commandCount }, builder.mCommandTypeFlags); + appendCommands(engine, { curr, commandCount }, + builder.mUboHandle, + builder.mVisibleRenderables, + builder.mCommandTypeFlags, + builder.mFlags, + builder.mVisibilityMask, + builder.mVariant, + builder.mCameraPosition, + builder.mCameraForwardVector); if (builder.mCustomCommands.has_value()) { Command* p = curr + commandCount; @@ -147,7 +157,8 @@ RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexce } // this destructor is actually heavy because it inlines ~vector<> -RenderPass::~RenderPass() noexcept = default; +RenderPass::~RenderPass() noexcept { +} void RenderPass::resize(Arena& arena, size_t count) noexcept { if (mCommandBegin) { @@ -157,11 +168,18 @@ void RenderPass::resize(Arena& arena, size_t count) noexcept { } void RenderPass::appendCommands(FEngine& engine, - Slice commands, CommandTypeFlags const commandTypeFlags) noexcept { + Slice commands, + backend::BufferObjectHandle const uboHandle, + utils::Range const vr, + CommandTypeFlags const commandTypeFlags, + RenderFlags const renderFlags, + FScene::VisibleMaskType const visibilityMask, + Variant const variant, + float3 const cameraPosition, + float3 const cameraForwardVector) noexcept { SYSTRACE_CALL(); SYSTRACE_CONTEXT(); - utils::Range const vr = mVisibleRenderables; // trace the number of visible renderables SYSTRACE_VALUE32("visibleRenderables", vr.size()); if (UTILS_UNLIKELY(vr.empty())) { @@ -174,9 +192,6 @@ void RenderPass::appendCommands(FEngine& engine, } JobSystem& js = engine.getJobSystem(); - const RenderFlags renderFlags = mFlags; - const Variant variant = mVariant; - const FScene::VisibleMaskType visibilityMask = mVisibilityMask; // up-to-date summed primitive counts needed for generateCommands() FScene::RenderableSoa const& soa = mRenderableSoa; @@ -186,13 +201,14 @@ void RenderPass::appendCommands(FEngine& engine, auto stereoscopicEyeCount = engine.getConfig().stereoscopicEyeCount; - const float3 cameraPosition(mCameraPosition); - const float3 cameraForwardVector(mCameraForwardVector); - auto work = [commandTypeFlags, curr, &soa, variant, renderFlags, visibilityMask, cameraPosition, - cameraForwardVector, stereoscopicEyeCount] + auto work = [commandTypeFlags, curr, &soa, + boh = uboHandle, + variant, renderFlags, visibilityMask, + cameraPosition, cameraForwardVector, stereoscopicEyeCount] (uint32_t startIndex, uint32_t indexCount) { RenderPass::generateCommands(commandTypeFlags, curr, - soa, { startIndex, startIndex + indexCount }, variant, renderFlags, visibilityMask, + soa, { startIndex, startIndex + indexCount }, boh, + variant, renderFlags, visibilityMask, cameraPosition, cameraForwardVector, stereoscopicEyeCount); }; @@ -204,8 +220,8 @@ void RenderPass::appendCommands(FEngine& engine, js.runAndWait(jobCommandsParallel); } - // always add an "eof" command - // "eof" command. these commands are guaranteed to be sorted last in the + // Always add an "eof" command + // "eof" command. These commands are guaranteed to be sorted last in the // command buffer. curr[commandCount - 1].key = uint64_t(Pass::SENTINEL); @@ -213,8 +229,8 @@ void RenderPass::appendCommands(FEngine& engine, // This must be done from the main thread. for (Command const* first = curr, *last = curr + commandCount ; first != last ; ++first) { if (UTILS_LIKELY((first->key & CUSTOM_MASK) == uint64_t(CustomCommand::PASS))) { - auto ma = first->primitive.primitive->getMaterialInstance()->getMaterial(); - ma->prepareProgram(first->primitive.materialVariant); + auto ma = first->info.mi->getMaterial(); + ma->prepareProgram(first->info.materialVariant); } } } @@ -290,19 +306,26 @@ void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept { while (curr != last) { - // we can't have nice things! No more than maxInstanceCount due to UBO size limits - Command const* const e = std::find_if_not(curr, std::min(last, curr + maxInstanceCount), - [lhs = *curr](Command const& rhs) { - // primitives must be identical to be instanced. Currently, instancing doesn't support - // skinning/morphing. - return lhs.primitive.primitive == rhs.primitive.primitive && - lhs.primitive.rasterState == rhs.primitive.rasterState && - lhs.primitive.skinningHandle == rhs.primitive.skinningHandle && - lhs.primitive.skinningOffset == rhs.primitive.skinningOffset && - lhs.primitive.morphWeightBuffer == rhs.primitive.morphWeightBuffer && - lhs.primitive.morphTargetBuffer == rhs.primitive.morphTargetBuffer && - lhs.primitive.skinningTexture == rhs.primitive.skinningTexture ; - }); + // Currently, if we have skinnning or morphing, we can't use auto instancing. This is + // because the morphing/skinning data for comparison is not easily accessible. + // Additionally, we can't have a different skinning/morphing per instance anyway. + // And thirdly, the info.index meaning changes with instancing, it is the index into + // the instancing buffer no longer the index into the soa. + Command const* e = curr + 1; + if (UTILS_LIKELY(!curr->info.hasSkinning && !curr->info.hasMorphing)) { + // we can't have nice things! No more than maxInstanceCount due to UBO size limits + e = std::find_if_not(curr, std::min(last, curr + maxInstanceCount), + [lhs = *curr](Command const& rhs) { + // primitives must be identical to be instanced. + // Currently, instancing doesn't support skinning/morphing. + return lhs.info.mi == rhs.info.mi && + lhs.info.rph == rhs.info.rph && + lhs.info.vbih == rhs.info.vbih && + lhs.info.indexOffset == rhs.info.indexOffset && + lhs.info.indexCount == rhs.info.indexCount && + lhs.info.rasterState == rhs.info.rasterState; + }); + } uint32_t const instanceCount = e - curr; assert_invariant(instanceCount > 0); @@ -313,6 +336,15 @@ void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept { // allocate our staging buffer only if needed if (UTILS_UNLIKELY(!stagingBuffer)) { + + // create a temporary UBO for instancing + size_t const count = mCommandEnd - mCommandBegin; + mInstancedUboHandle = BufferObjectSharedHandle{ + engine.getDriverApi().createBufferObject( + count * sizeof(PerRenderableData) + sizeof(PerRenderableUib), + BufferObjectBinding::UNIFORM, BufferUsage::STATIC), + engine.getDriverApi() }; + // TODO: use stream inline buffer for small sizes // TODO: use a pool for larger heap buffers // buffer large enough for all instances data @@ -326,12 +358,14 @@ void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept { assert_invariant(instancedPrimitiveOffset + instanceCount <= stagingBufferSize / sizeof(PerRenderableData)); for (uint32_t i = 0; i < instanceCount; i++) { - stagingBuffer[instancedPrimitiveOffset + i] = uboData[curr[i].primitive.index]; + stagingBuffer[instancedPrimitiveOffset + i] = uboData[curr[i].info.index]; } // make the first command instanced - curr[0].primitive.instanceCount = instanceCount; - curr[0].primitive.index = instancedPrimitiveOffset; + curr[0].info.instanceCount = instanceCount; + curr[0].info.index = instancedPrimitiveOffset; + curr[0].info.boh = mInstancedUboHandle; + instancedPrimitiveOffset += instanceCount; // cancel commands that are now instances @@ -351,12 +385,6 @@ void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept { // we have instanced primitives DriverApi& driver = engine.getDriverApi(); - // TODO: maybe use a pool? so we can reuse the buffer. - // create a ubo to hold the instanced primitive data - mInstancedUboHandle = driver.createBufferObject( - sizeof(PerRenderableData) * instancedPrimitiveOffset + sizeof(PerRenderableUib), - BufferObjectBinding::UNIFORM, backend::BufferUsage::STATIC); - // copy our instanced ubo data driver.updateBufferObjectUnsynchronized(mInstancedUboHandle, { stagingBuffer, sizeof(PerRenderableData) * instancedPrimitiveOffset, @@ -409,22 +437,22 @@ void RenderPass::setupColorCommand(Command& cmdDraw, Variant variant, keyDraw |= makeField(ma->getRasterState().alphaToCoverage, BLENDING_MASK, BLENDING_SHIFT); cmdDraw.key = isBlendingCommand ? keyBlending : keyDraw; - cmdDraw.primitive.rasterState = ma->getRasterState(); + cmdDraw.info.rasterState = ma->getRasterState(); // for SSR pass, the blending mode of opaques (including MASKED) must be off // see Material.cpp. const bool blendingMustBeOff = !isBlendingCommand && Variant::isSSRVariant(variant); - cmdDraw.primitive.rasterState.blendFunctionSrcAlpha = blendingMustBeOff ? - BlendFunction::ONE : cmdDraw.primitive.rasterState.blendFunctionSrcAlpha; - cmdDraw.primitive.rasterState.blendFunctionDstAlpha = blendingMustBeOff ? - BlendFunction::ZERO : cmdDraw.primitive.rasterState.blendFunctionDstAlpha; - - cmdDraw.primitive.rasterState.inverseFrontFaces = inverseFrontFaces; - cmdDraw.primitive.rasterState.culling = mi->getCullingMode(); - cmdDraw.primitive.rasterState.colorWrite = mi->isColorWriteEnabled(); - cmdDraw.primitive.rasterState.depthWrite = mi->isDepthWriteEnabled(); - cmdDraw.primitive.rasterState.depthFunc = mi->getDepthFunc(); - cmdDraw.primitive.materialVariant = variant; + cmdDraw.info.rasterState.blendFunctionSrcAlpha = blendingMustBeOff ? + BlendFunction::ONE : cmdDraw.info.rasterState.blendFunctionSrcAlpha; + cmdDraw.info.rasterState.blendFunctionDstAlpha = blendingMustBeOff ? + BlendFunction::ZERO : cmdDraw.info.rasterState.blendFunctionDstAlpha; + + cmdDraw.info.rasterState.inverseFrontFaces = inverseFrontFaces; + cmdDraw.info.rasterState.culling = mi->getCullingMode(); + cmdDraw.info.rasterState.colorWrite = mi->isColorWriteEnabled(); + cmdDraw.info.rasterState.depthWrite = mi->isDepthWriteEnabled(); + cmdDraw.info.rasterState.depthFunc = mi->getDepthFunc(); + cmdDraw.info.materialVariant = variant; // we keep "RasterState::colorWrite" to the value set by material (could be disabled) } @@ -432,6 +460,7 @@ void RenderPass::setupColorCommand(Command& cmdDraw, Variant variant, UTILS_NOINLINE void RenderPass::generateCommands(CommandTypeFlags commandTypeFlags, Command* const commands, FScene::RenderableSoa const& soa, Range range, + backend::BufferObjectHandle renderablesUbo, Variant variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, float3 cameraPosition, float3 cameraForward, uint8_t stereoEyeCount) noexcept { @@ -465,12 +494,14 @@ void RenderPass::generateCommands(CommandTypeFlags commandTypeFlags, Command* co switch (commandTypeFlags & (CommandTypeFlags::COLOR | CommandTypeFlags::DEPTH)) { case CommandTypeFlags::COLOR: curr = generateCommandsImpl(commandTypeFlags, curr, - soa, range, variant, renderFlags, visibilityMask, cameraPosition, cameraForward, + soa, range, renderablesUbo, + variant, renderFlags, visibilityMask, cameraPosition, cameraForward, stereoEyeCount); break; case CommandTypeFlags::DEPTH: curr = generateCommandsImpl(commandTypeFlags, curr, - soa, range, variant, renderFlags, visibilityMask, cameraPosition, cameraForward, + soa, range, renderablesUbo, + variant, renderFlags, visibilityMask, cameraPosition, cameraForward, stereoEyeCount); break; default: @@ -493,57 +524,59 @@ UTILS_NOINLINE RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFlags extraFlags, Command* UTILS_RESTRICT curr, FScene::RenderableSoa const& UTILS_RESTRICT soa, Range range, + backend::BufferObjectHandle renderablesUbo, Variant const variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, float3 cameraPosition, float3 cameraForward, uint8_t stereoEyeCount) noexcept { - // generateCommands() writes both the draw and depth commands simultaneously such that - // we go throw the list of renderables just once. - // (in principle, we could have split this method into two, at the cost of going through - // the list twice) - constexpr bool isColorPass = bool(commandTypeFlags & CommandTypeFlags::COLOR); constexpr bool isDepthPass = bool(commandTypeFlags & CommandTypeFlags::DEPTH); - static_assert(isColorPass != isDepthPass, "only color or depth pass supported"); - const bool depthContainsShadowCasters = bool(extraFlags & CommandTypeFlags::DEPTH_CONTAINS_SHADOW_CASTERS); - const bool depthFilterAlphaMaskedObjects = bool(extraFlags & CommandTypeFlags::DEPTH_FILTER_ALPHA_MASKED_OBJECTS); - const bool filterTranslucentObjects = bool(extraFlags & CommandTypeFlags::FILTER_TRANSLUCENT_OBJECTS); + bool const depthContainsShadowCasters = + bool(extraFlags & CommandTypeFlags::DEPTH_CONTAINS_SHADOW_CASTERS); + + bool const depthFilterAlphaMaskedObjects = + bool(extraFlags & CommandTypeFlags::DEPTH_FILTER_ALPHA_MASKED_OBJECTS); - auto const* const UTILS_RESTRICT soaWorldAABBCenter = soa.data(); - auto const* const UTILS_RESTRICT soaVisibility = soa.data(); - auto const* const UTILS_RESTRICT soaPrimitives = soa.data(); - auto const* const UTILS_RESTRICT soaSkinning = soa.data(); - auto const* const UTILS_RESTRICT soaMorphing = soa.data(); - auto const* const UTILS_RESTRICT soaVisibilityMask = soa.data(); - auto const* const UTILS_RESTRICT soaInstanceInfo = soa.data(); + bool const filterTranslucentObjects = + bool(extraFlags & CommandTypeFlags::FILTER_TRANSLUCENT_OBJECTS); - const bool hasShadowing = renderFlags & HAS_SHADOWING; - const bool viewInverseFrontFaces = renderFlags & HAS_INVERSE_FRONT_FACES; - const bool hasInstancedStereo = renderFlags & IS_INSTANCED_STEREOSCOPIC; + bool const hasShadowing = + renderFlags & HAS_SHADOWING; - Command cmdColor; + bool const viewInverseFrontFaces = + renderFlags & HAS_INVERSE_FRONT_FACES; + + bool const hasInstancedStereo = + renderFlags & IS_INSTANCED_STEREOSCOPIC; + + float const cameraPositionDotCameraForward = dot(cameraPosition, cameraForward); + + auto const* const UTILS_RESTRICT soaWorldAABBCenter = soa.data(); + auto const* const UTILS_RESTRICT soaVisibility = soa.data(); + auto const* const UTILS_RESTRICT soaPrimitives = soa.data(); + auto const* const UTILS_RESTRICT soaSkinning = soa.data(); + auto const* const UTILS_RESTRICT soaMorphing = soa.data(); + auto const* const UTILS_RESTRICT soaVisibilityMask = soa.data(); + auto const* const UTILS_RESTRICT soaInstanceInfo = soa.data(); + + Command cmd; - Command cmdDepth; if constexpr (isDepthPass) { - cmdDepth.primitive.materialVariant = variant; - cmdDepth.primitive.rasterState = {}; - cmdDepth.primitive.rasterState.colorWrite = Variant::isPickingVariant(variant) || Variant::isVSMVariant(variant); - cmdDepth.primitive.rasterState.depthWrite = true; - cmdDepth.primitive.rasterState.depthFunc = RasterState::DepthFunc::GE; - cmdDepth.primitive.rasterState.alphaToCoverage = false; + cmd.info.materialVariant = variant; + cmd.info.rasterState = {}; + cmd.info.rasterState.colorWrite = Variant::isPickingVariant(variant) || Variant::isVSMVariant(variant); + cmd.info.rasterState.depthWrite = true; + cmd.info.rasterState.depthFunc = RasterState::DepthFunc::GE; + cmd.info.rasterState.alphaToCoverage = false; } - const float cameraPositionDotCameraForward = dot(cameraPosition, cameraForward); - for (uint32_t i = range.first; i < range.last; ++i) { // Check if this renderable passes the visibilityMask. if (UTILS_UNLIKELY(!(soaVisibilityMask[i] & visibilityMask))) { continue; } - Variant renderableVariant = variant; - // Signed distance from camera plane to object's center. Positive distances are in front of // the camera. Some objects with a center behind the camera can still be visible // so their distance will be negative (this happens a lot for the shadow map). @@ -558,7 +591,6 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // float3 d = soaWorldAABBCenter[i] - cameraPosition; // float distance = dot(d, cameraForward); // but saves a couple of instruction, because part of the math is done outside the loop. - float distance = dot(soaWorldAABBCenter[i], cameraForward) - cameraPositionDotCameraForward; // We negate the distance to the camera in order to create a bit pattern that will // be sorted properly, this works because: @@ -570,72 +602,59 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // Here, objects close to the camera (but behind) will be drawn first. // An alternative that keeps the mathematical ordering is given here: // distanceBits ^= ((int32_t(distanceBits) >> 31) | 0x80000000u); - distance = -distance; - const uint32_t distanceBits = reinterpret_cast(distance); + float const distance = -dot(soaWorldAABBCenter[i], cameraForward) - cameraPositionDotCameraForward; + uint32_t const distanceBits = reinterpret_cast(distance); // calculate the per-primitive face winding order inversion - const bool inverseFrontFaces = viewInverseFrontFaces ^ soaVisibility[i].reversedWindingOrder; - const bool hasMorphing = soaVisibility[i].morphing; - const bool hasSkinning = soaVisibility[i].skinning; - const bool hasSkinningOrMorphing = hasSkinning || hasMorphing; - - cmdColor.key = makeField(soaVisibility[i].priority, PRIORITY_MASK, PRIORITY_SHIFT); - cmdColor.key |= makeField(soaVisibility[i].channel, CHANNEL_MASK, CHANNEL_SHIFT); - cmdColor.primitive.index = i; - cmdColor.primitive.instanceCount = - soaInstanceInfo[i].count | PrimitiveInfo::USER_INSTANCE_MASK; - cmdColor.primitive.instanceBufferHandle = soaInstanceInfo[i].handle; - - // soaInstanceInfo[i].count is the number of instances the user has requested, either for - // manual or hybrid instancing. Instanced stereo multiplies the number of instances by the - // eye count. - if (UTILS_UNLIKELY(hasInstancedStereo)) { - cmdColor.primitive.instanceCount = - (soaInstanceInfo[i].count * stereoEyeCount) | - PrimitiveInfo::USER_INSTANCE_MASK; - } + bool const inverseFrontFaces = viewInverseFrontFaces ^ soaVisibility[i].reversedWindingOrder; + bool const hasMorphing = soaVisibility[i].morphing; + bool const hasSkinning = soaVisibility[i].skinning; + bool const hasSkinningOrMorphing = hasSkinning || hasMorphing; // if we are already an SSR variant, the SRE bit is already set, // there is no harm setting it again static_assert(Variant::SPECIAL_SSR & Variant::SRE); + Variant renderableVariant = variant; renderableVariant.setShadowReceiver( Variant::isSSRVariant(variant) || (soaVisibility[i].receiveShadows & hasShadowing)); renderableVariant.setSkinning(hasSkinningOrMorphing); - const FRenderableManager::SkinningBindingInfo& skinning = soaSkinning[i]; - const FRenderableManager::MorphingBindingInfo& morphing = soaMorphing[i]; - - if constexpr (isDepthPass) { - cmdDepth.key = uint64_t(Pass::DEPTH); - cmdDepth.key |= uint64_t(CustomCommand::PASS); - cmdDepth.key |= makeField(soaVisibility[i].priority, PRIORITY_MASK, PRIORITY_SHIFT); - cmdDepth.key |= makeField(soaVisibility[i].channel, CHANNEL_MASK, CHANNEL_SHIFT); - cmdDepth.key |= makeField(distanceBits >> 22u, Z_BUCKET_MASK, Z_BUCKET_SHIFT); - cmdDepth.primitive.index = i; - cmdDepth.primitive.instanceCount = - soaInstanceInfo[i].count | PrimitiveInfo::USER_INSTANCE_MASK; - cmdDepth.primitive.instanceBufferHandle = soaInstanceInfo[i].handle; - cmdDepth.primitive.materialVariant.setSkinning(hasSkinningOrMorphing); - cmdDepth.primitive.rasterState.inverseFrontFaces = inverseFrontFaces; - - cmdDepth.primitive.skinningHandle = skinning.handle; - cmdDepth.primitive.skinningOffset = skinning.offset; - cmdDepth.primitive.skinningTexture = skinning.handleSampler; - cmdDepth.primitive.morphWeightBuffer = morphing.handle; - - if (UTILS_UNLIKELY(hasInstancedStereo)) { - cmdColor.primitive.instanceCount = - (soaInstanceInfo[i].count * stereoEyeCount) | - PrimitiveInfo::USER_INSTANCE_MASK; - } - } + FRenderableManager::SkinningBindingInfo const& skinning = soaSkinning[i]; + FRenderableManager::MorphingBindingInfo const& morphing = soaMorphing[i]; + if constexpr (isColorPass) { renderableVariant.setFog(soaVisibility[i].fog && Variant::isFogVariant(variant)); + cmd.key = uint64_t(Pass::COLOR); + } else if constexpr (isDepthPass) { + cmd.key = uint64_t(Pass::DEPTH); + cmd.key |= uint64_t(CustomCommand::PASS); + cmd.key |= makeField(distanceBits >> 22u, Z_BUCKET_MASK, Z_BUCKET_SHIFT); + cmd.info.materialVariant.setSkinning(hasSkinningOrMorphing); + cmd.info.rasterState.inverseFrontFaces = inverseFrontFaces; + } + + cmd.key |= makeField(soaVisibility[i].priority, PRIORITY_MASK, PRIORITY_SHIFT); + cmd.key |= makeField(soaVisibility[i].channel, CHANNEL_MASK, CHANNEL_SHIFT); + cmd.info.index = i; + cmd.info.hasHybridInstancing = (bool)soaInstanceInfo[i].handle; + cmd.info.instanceCount = soaInstanceInfo[i].count; + cmd.info.hasMorphing = (bool)morphing.handle; + cmd.info.hasSkinning = (bool)skinning.handle; - cmdColor.primitive.skinningHandle = skinning.handle; - cmdColor.primitive.skinningOffset = skinning.offset; - cmdColor.primitive.skinningTexture = skinning.handleSampler; - cmdColor.primitive.morphWeightBuffer = morphing.handle; + // soaInstanceInfo[i].count is the number of instances the user has requested, either for + // manual or hybrid instancing. Instanced stereo multiplies the number of instances by the + // eye count. + if (hasInstancedStereo) { + cmd.info.instanceCount *= stereoEyeCount; + } + + if (cmd.info.hasHybridInstancing) { + // with hybrid instancing, we already know which UBO to use + cmd.info.boh = soaInstanceInfo[i].handle; + } else { + // with no- or user- instancing, we can only know after instanceify() + assert_invariant(cmd.info.instanceCount <= 1); + cmd.info.boh = renderablesUbo; } const bool shadowCaster = soaVisibility[i].castShadows & hasShadowing; @@ -655,13 +674,17 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // TODO: we should disable the SKN variant if this primitive doesn't have either // skinning or morphing. - if constexpr (isColorPass) { - cmdColor.primitive.primitive = &primitive; - RenderPass::setupColorCommand(cmdColor, renderableVariant, mi, inverseFrontFaces); - - cmdColor.primitive.morphTargetBuffer = morphTargets.buffer->getHwHandle(); + cmd.info.mi = mi; + cmd.info.rph = primitive.getHwHandle(); + cmd.info.vbih = primitive.getVertexBufferInfoHandle(); + cmd.info.indexOffset = primitive.getIndexOffset(); + cmd.info.indexCount = primitive.getIndexCount(); + cmd.info.type = primitive.getPrimitiveType(); + cmd.info.morphTargetBuffer = morphTargets.buffer->getHwHandle(); - const bool blendPass = Pass(cmdColor.key & PASS_MASK) == Pass::BLENDED; + if constexpr (isColorPass) { + RenderPass::setupColorCommand(cmd, renderableVariant, mi, inverseFrontFaces); + const bool blendPass = Pass(cmd.key & PASS_MASK) == Pass::BLENDED; if (blendPass) { // TODO: at least for transparent objects, AABB should be per primitive // but that would break the "local" blend-order, which relies on @@ -669,16 +692,16 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // blend pass: // This will sort back-to-front for blended, and honor explicit ordering // for a given Z value, or globally. - cmdColor.key &= ~BLEND_ORDER_MASK; - cmdColor.key &= ~BLEND_DISTANCE_MASK; + cmd.key &= ~BLEND_ORDER_MASK; + cmd.key &= ~BLEND_DISTANCE_MASK; // write the distance - cmdColor.key |= makeField(~distanceBits, + cmd.key |= makeField(~distanceBits, BLEND_DISTANCE_MASK, BLEND_DISTANCE_SHIFT); // clear the distance if global ordering is enabled - cmdColor.key &= ~select(primitive.isGlobalBlendOrderEnabled(), + cmd.key &= ~select(primitive.isGlobalBlendOrderEnabled(), BLEND_DISTANCE_MASK); // write blend order - cmdColor.key |= makeField(primitive.getBlendOrder(), + cmd.key |= makeField(primitive.getBlendOrder(), BLEND_ORDER_MASK, BLEND_ORDER_SHIFT); @@ -695,11 +718,11 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // In this mode, we override the user's culling mode. // TWO_PASSES_TWO_SIDES: this command will be issued 2nd, draw front faces - cmdColor.primitive.rasterState.culling = + cmd.info.rasterState.culling = (mode == TransparencyMode::TWO_PASSES_TWO_SIDES) ? - CullingMode::BACK : cmdColor.primitive.rasterState.culling; + CullingMode::BACK : cmd.info.rasterState.culling; - uint64_t key = cmdColor.key; + uint64_t key = cmd.key; // draw this command AFTER THE NEXT ONE key |= makeField(1, BLEND_TWO_PASS_MASK, BLEND_TWO_PASS_SHIFT); @@ -713,70 +736,51 @@ RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFla // cancel command if both front and back faces are culled key |= select(mi->getCullingMode() == CullingMode::FRONT_AND_BACK); - *curr = cmdColor; + *curr = cmd; curr->key = key; ++curr; // TWO_PASSES_TWO_SIDES: this command will be issued first, draw back sides (i.e. cull front) - cmdColor.primitive.rasterState.culling = + cmd.info.rasterState.culling = (mode == TransparencyMode::TWO_PASSES_TWO_SIDES) ? - CullingMode::FRONT : cmdColor.primitive.rasterState.culling; + CullingMode::FRONT : cmd.info.rasterState.culling; // TWO_PASSES_ONE_SIDE: this command will be issued first, draw (back side) in depth buffer only - cmdColor.primitive.rasterState.depthWrite |= select(mode == TransparencyMode::TWO_PASSES_ONE_SIDE); - cmdColor.primitive.rasterState.colorWrite &= ~select(mode == TransparencyMode::TWO_PASSES_ONE_SIDE); - cmdColor.primitive.rasterState.depthFunc = + cmd.info.rasterState.depthWrite |= select(mode == TransparencyMode::TWO_PASSES_ONE_SIDE); + cmd.info.rasterState.colorWrite &= ~select(mode == TransparencyMode::TWO_PASSES_ONE_SIDE); + cmd.info.rasterState.depthFunc = (mode == TransparencyMode::TWO_PASSES_ONE_SIDE) ? - SamplerCompareFunc::GE : cmdColor.primitive.rasterState.depthFunc; - + SamplerCompareFunc::GE : cmd.info.rasterState.depthFunc; } else { // color pass: // This will bucket objects by Z, front-to-back and then sort by material // in each buckets. We use the top 10 bits of the distance, which // bucketizes the depth by its log2 and in 4 linear chunks in each bucket. - cmdColor.key &= ~Z_BUCKET_MASK; - cmdColor.key |= makeField(distanceBits >> 22u, Z_BUCKET_MASK, Z_BUCKET_SHIFT); + cmd.key &= ~Z_BUCKET_MASK; + cmd.key |= makeField(distanceBits >> 22u, Z_BUCKET_MASK, Z_BUCKET_SHIFT); } - - *curr = cmdColor; - - // cancel command if both front and back faces are culled - curr->key |= select(mi->getCullingMode() == CullingMode::FRONT_AND_BACK); - - ++curr; - } - - if constexpr (isDepthPass) { + } else if constexpr (isDepthPass) { const RasterState rs = ma->getRasterState(); const TransparencyMode mode = mi->getTransparencyMode(); const BlendingMode blendingMode = ma->getBlendingMode(); const bool translucent = (blendingMode != BlendingMode::OPAQUE && blendingMode != BlendingMode::MASKED); - // TODO: we should disable the SKN variant if this primitive doesn't have either - // skinning or morphing. - - cmdDepth.key |= mi->getSortingKey(); // already all set-up for direct or'ing - - // unconditionally write the command - cmdDepth.primitive.primitive = &primitive; - cmdDepth.primitive.rasterState.culling = mi->getCullingMode(); - cmdDepth.primitive.morphTargetBuffer = morphTargets.buffer->getHwHandle(); + cmd.key |= mi->getSortingKey(); // already all set-up for direct or'ing + cmd.info.rasterState.culling = mi->getCullingMode(); // FIXME: should writeDepthForShadowCasters take precedence over mi->getDepthWrite()? - cmdDepth.primitive.rasterState.depthWrite = (1 // only keep bit 0 + cmd.info.rasterState.depthWrite = (1 // only keep bit 0 & (mi->isDepthWriteEnabled() | (mode == TransparencyMode::TWO_PASSES_ONE_SIDE)) - & !(filterTranslucentObjects & translucent) - & !(depthFilterAlphaMaskedObjects & rs.alphaToCoverage)) - | writeDepthForShadowCasters; - - *curr = cmdDepth; - - // cancel command if both front and back faces are culled - curr->key |= select(mi->getCullingMode() == CullingMode::FRONT_AND_BACK); - - ++curr; + & !(filterTranslucentObjects & translucent) + & !(depthFilterAlphaMaskedObjects & rs.alphaToCoverage)) + | writeDepthForShadowCasters; } + + *curr = cmd; + // cancel command if both front and back faces are culled + curr->key |= select(mi->getCullingMode() == CullingMode::FRONT_AND_BACK); + ++curr; } } return curr; @@ -915,20 +919,20 @@ void RenderPass::Executor::execute(FEngine& engine, } // primitiveHandle may be invalid if no geometry was set on the renderable. - if (UTILS_UNLIKELY(!first->primitive.primitive->getHwHandle())) { + if (UTILS_UNLIKELY(!first->info.rph)) { continue; } // per-renderable uniform - PrimitiveInfo const info = first->primitive; + PrimitiveInfo const info = first->info; pipeline.rasterState = info.rasterState; - pipeline.vertexBufferInfo = info.primitive->getVertexBufferInfoHandle(); - pipeline.primitiveType = info.primitive->getPrimitiveType(); + pipeline.vertexBufferInfo = info.vbih; + pipeline.primitiveType = info.type; assert_invariant(pipeline.vertexBufferInfo); - if (UTILS_UNLIKELY(mi != info.primitive->getMaterialInstance())) { + if (UTILS_UNLIKELY(mi != info.mi)) { // this is always taken the first time - mi = info.primitive->getMaterialInstance(); + mi = info.mi; assert_invariant(mi); ma = mi->getMaterial(); @@ -956,47 +960,33 @@ void RenderPass::Executor::execute(FEngine& engine, assert_invariant(ma); pipeline.program = ma->getProgram(info.materialVariant); - uint16_t const instanceCount = - info.instanceCount & PrimitiveInfo::INSTANCE_COUNT_MASK; - auto getPerObjectUboHandle = - [this, &info, &instanceCount]() -> std::pair, uint32_t> { - if (info.instanceBufferHandle) { - // "hybrid" instancing -- instanceBufferHandle takes the place of the UBO - return { info.instanceBufferHandle, 0 }; - } - bool const userInstancing = - (info.instanceCount & PrimitiveInfo::USER_INSTANCE_MASK) != 0u; - if (!userInstancing && instanceCount > 1) { - // automatic instancing - return { - mInstancedUboHandle, - info.index * sizeof(PerRenderableData) }; - } else { - // manual instancing - return { mUboHandle, info.index * sizeof(PerRenderableData) }; - } - }; - // Bind per-renderable uniform block. There is no need to attempt to skip this command // because the backends already do this. - auto const [perObjectUboHandle, offset] = getPerObjectUboHandle(); - assert_invariant(perObjectUboHandle); + size_t const offset = info.hasHybridInstancing ? + 0 : info.index * sizeof(PerRenderableData); + + assert_invariant(info.boh); + driver.bindBufferRange(BufferObjectBinding::UNIFORM, +UniformBindingPoints::PER_RENDERABLE, - perObjectUboHandle, - offset, - sizeof(PerRenderableUib)); + info.boh, offset, sizeof(PerRenderableUib)); + + if (UTILS_UNLIKELY(info.hasSkinning)) { + + FScene::RenderableSoa const& soa = *mRenderableSoa; + + const FRenderableManager::SkinningBindingInfo& skinning = + soa.elementAt(info.index); - if (UTILS_UNLIKELY(info.skinningHandle)) { // note: we can't bind less than sizeof(PerRenderableBoneUib) due to glsl limitations driver.bindBufferRange(BufferObjectBinding::UNIFORM, +UniformBindingPoints::PER_RENDERABLE_BONES, - info.skinningHandle, - info.skinningOffset * sizeof(PerRenderableBoneUib::BoneData), + skinning.handle, + skinning.offset * sizeof(PerRenderableBoneUib::BoneData), sizeof(PerRenderableBoneUib)); // note: always bind the skinningTexture because the shader needs it. driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING, - info.skinningTexture); + skinning.handleSampler); // note: even if only skinning is enabled, binding morphTargetBuffer is needed. driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING, info.morphTargetBuffer); @@ -1006,16 +996,25 @@ void RenderPass::Executor::execute(FEngine& engine, rebindPipeline = true; } - if (UTILS_UNLIKELY(info.morphWeightBuffer)) { + if (UTILS_UNLIKELY(info.hasMorphing)) { + + FScene::RenderableSoa const& soa = *mRenderableSoa; + + const FRenderableManager::SkinningBindingInfo& skinning = + soa.elementAt(info.index); + + const FRenderableManager::MorphingBindingInfo& morphing = + soa.elementAt(info.index); + // Instead of using a UBO per primitive, we could also have a single UBO for all // primitives and use bindUniformBufferRange which might be more efficient. driver.bindUniformBuffer(+UniformBindingPoints::PER_RENDERABLE_MORPHING, - info.morphWeightBuffer); + morphing.handle); driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING, info.morphTargetBuffer); // note: even if only morphing is enabled, binding skinningTexture is needed. driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING, - info.skinningTexture); + skinning.handleSampler); // FIXME: Currently we need to rebind the PipelineState when texture or // UBO binding change. @@ -1029,15 +1028,12 @@ void RenderPass::Executor::execute(FEngine& engine, driver.bindPipeline(pipeline); } - if (info.primitive->getHwHandle() != currentPrimitiveHandle) { - currentPrimitiveHandle = info.primitive->getHwHandle(); - driver.bindRenderPrimitive(info.primitive->getHwHandle()); + if (info.rph != currentPrimitiveHandle) { + currentPrimitiveHandle = info.rph; + driver.bindRenderPrimitive(info.rph); } - driver.draw2( - info.primitive->getIndexOffset(), - info.primitive->getIndexCount(), - instanceCount); + driver.draw2(info.indexOffset, info.indexCount, info.instanceCount); } } @@ -1047,20 +1043,16 @@ void RenderPass::Executor::execute(FEngine& engine, engine.flush(); } } - - if (mInstancedUboHandle) { - driver.destroyBufferObject(mInstancedUboHandle); - } - } // ------------------------------------------------------------------------------------------------ -RenderPass::Executor::Executor(RenderPass const* pass, Command const* b, Command const* e) noexcept - : mCommands(b, e), +RenderPass::Executor::Executor(RenderPass const* pass, Command const* b, Command const* e, + BufferObjectSharedHandle instancedUbo) noexcept + : mRenderableSoa(&pass->mRenderableSoa), + mCommands(b, e), mCustomCommands(pass->mCustomCommands.data(), pass->mCustomCommands.size()), - mUboHandle(pass->mUboHandle), - mInstancedUboHandle(pass->mInstancedUboHandle), + mInstancedUboHandle(std::move(instancedUbo)), mScissorViewport(pass->mScissorViewport), mPolygonOffsetOverride(false), mScissorOverride(false) { @@ -1068,7 +1060,14 @@ RenderPass::Executor::Executor(RenderPass const* pass, Command const* b, Command assert_invariant(e <= pass->end()); } -RenderPass::Executor::Executor(Executor const& rhs) = default; +RenderPass::Executor::Executor() noexcept + : mPolygonOffsetOverride(false), + mScissorOverride(false) { +} + +RenderPass::Executor::Executor(Executor&& rhs) noexcept = default; + +RenderPass::Executor& RenderPass::Executor::operator=(Executor&& rhs) noexcept = default; // this destructor is actually heavy because it inlines ~vector<> RenderPass::Executor::~Executor() noexcept = default; diff --git a/filament/src/RenderPass.h b/filament/src/RenderPass.h index fe6e04cafc9..3ab81d02e4b 100644 --- a/filament/src/RenderPass.h +++ b/filament/src/RenderPass.h @@ -19,16 +19,19 @@ #include "Allocators.h" +#include "SharedHandle.h" + #include "details/Camera.h" #include "details/Scene.h" #include "private/filament/Variant.h" -#include "utils/BitmaskEnum.h" +#include #include #include #include +#include #include #include #include @@ -238,31 +241,33 @@ class RenderPass { struct PrimitiveInfo { // 56 bytes union { - FRenderPrimitive const* primitive; // 8 bytes; - uint64_t padding = {}; // ensures primitive is 8 bytes on all archs - }; // 8 bytes - uint64_t rfu0; // 8 bytes - backend::RasterState rasterState; // 4 bytes - backend::Handle skinningHandle; // 4 bytes - backend::Handle skinningTexture; // 4 bytes - backend::Handle morphWeightBuffer; // 4 bytes - backend::Handle morphTargetBuffer; // 4 bytes - backend::Handle instanceBufferHandle; // 4 bytes + FMaterialInstance const* mi; + uint64_t padding; // make this field 64 bits on all platforms + }; + backend::RenderPrimitiveHandle rph; // 4 bytes + backend::VertexBufferInfoHandle vbih; // 4 bytes + backend::BufferObjectHandle boh; // 4 bytes + uint32_t indexOffset; // 4 bytes + uint32_t indexCount; // 4 bytes uint32_t index = 0; // 4 bytes - uint32_t skinningOffset = 0; // 4 bytes + backend::SamplerGroupHandle morphTargetBuffer; // 4 bytes + + backend::RasterState rasterState; // 4 bytes + uint16_t instanceCount; // 2 bytes [MSb: user] Variant materialVariant; // 1 byte - uint8_t rfu1; // 1 byte - uint32_t rfu2; // 4 byte + backend::PrimitiveType type : 3; // 1 byte 3 bits + bool hasSkinning : 1; // 1 bit + bool hasMorphing : 1; // 1 bit + bool hasHybridInstancing : 1; // 1 bit - static const uint16_t USER_INSTANCE_MASK = 0x8000u; - static const uint16_t INSTANCE_COUNT_MASK = 0x7fffu; + uint32_t rfu[3]; // 16 bytes }; static_assert(sizeof(PrimitiveInfo) == 56); struct alignas(8) Command { // 64 bytes CommandKey key = 0; // 8 bytes - PrimitiveInfo primitive; // 56 bytes + PrimitiveInfo info; // 56 bytes bool operator < (Command const& rhs) const noexcept { return key < rhs.key; } // placement new declared as "throw" to avoid the compiler's null-check inline void* operator new (size_t, void* ptr) { @@ -288,11 +293,11 @@ class RenderPass { // RenderPass can only be moved RenderPass(RenderPass&& rhs) = default; + RenderPass& operator=(RenderPass&& rhs) = delete; // could be supported if needed // RenderPass can't be copied RenderPass(RenderPass const& rhs) = delete; RenderPass& operator=(RenderPass const& rhs) = delete; - RenderPass& operator=(RenderPass&& rhs) = delete; // allocated commands ARE NOT freed, they're owned by the Arena ~RenderPass() noexcept; @@ -307,6 +312,17 @@ class RenderPass { backend::Handle renderTarget, backend::RenderPassParams params) noexcept; + + class BufferObjectHandleDeleter { + std::reference_wrapper driver; + public: + explicit BufferObjectHandleDeleter(backend::DriverApi& driver) noexcept : driver(driver) { } + void operator()(backend::BufferObjectHandle handle) noexcept; + }; + + using BufferObjectSharedHandle = SharedHandle< + backend::HwBufferObject, BufferObjectHandleDeleter>; + /* * Executor holds the range of commands to execute for a given pass */ @@ -316,10 +332,10 @@ class RenderPass { friend class RenderPassBuilder; // these fields are constant after creation + FScene::RenderableSoa const* mRenderableSoa = nullptr; utils::Slice mCommands; utils::Slice mCustomCommands; - backend::Handle mUboHandle; - backend::Handle mInstancedUboHandle; + BufferObjectSharedHandle mInstancedUboHandle; backend::Viewport mScissorViewport; backend::Viewport mScissor{}; // value of scissor override @@ -327,7 +343,8 @@ class RenderPass { bool mPolygonOffsetOverride : 1; // whether to override the polygon offset setting bool mScissorOverride : 1; // whether to override the polygon offset setting - Executor(RenderPass const* pass, Command const* b, Command const* e) noexcept; + Executor(RenderPass const* pass, Command const* b, Command const* e, + BufferObjectSharedHandle instancedUbo) noexcept; void execute(FEngine& engine, const Command* first, const Command* last) const noexcept; @@ -336,9 +353,16 @@ class RenderPass { backend::Viewport const& scissor) noexcept; public: - Executor() = default; - Executor(Executor const& rhs); - Executor& operator=(Executor const& rhs) = default; + Executor() noexcept; + + // can't be copied + Executor(Executor const& rhs) noexcept = delete; + Executor& operator=(Executor const& rhs) noexcept = delete; + + // can be moved + Executor(Executor&& rhs) noexcept; + Executor& operator=(Executor&& rhs) noexcept; + ~Executor() noexcept; // if non-null, overrides the material's polygon offset @@ -353,11 +377,11 @@ class RenderPass { // returns a new executor for this pass Executor getExecutor() const { - return { this, mCommandBegin, mCommandEnd }; + return getExecutor(mCommandBegin, mCommandEnd); } Executor getExecutor(Command const* b, Command const* e) const { - return { this, b, e }; + return { this, b, e, mInstancedUboHandle }; } private: @@ -368,7 +392,15 @@ class RenderPass { // This is the main function of this class, this appends commands to the pass using // the current camera, geometry and flags set. This can be called multiple times if needed. void appendCommands(FEngine& engine, - utils::Slice commands, CommandTypeFlags commandTypeFlags) noexcept; + utils::Slice commands, + backend::BufferObjectHandle uboHandle, + utils::Range const visibleRenderables, + CommandTypeFlags commandTypeFlags, + RenderFlags renderFlags, + FScene::VisibleMaskType visibilityMask, + Variant variant, + math::float3 cameraPosition, + math::float3 cameraForwardVector) noexcept; // Appends a custom command. void appendCustomCommand(Command* commands, @@ -394,6 +426,7 @@ class RenderPass { static inline void generateCommands(CommandTypeFlags commandTypeFlags, Command* commands, FScene::RenderableSoa const& soa, utils::Range range, + backend::BufferObjectHandle renderablesUbo, Variant variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, math::float3 cameraPosition, math::float3 cameraForward, @@ -402,6 +435,7 @@ class RenderPass { template static inline Command* generateCommandsImpl(RenderPass::CommandTypeFlags extraFlags, Command* curr, FScene::RenderableSoa const& soa, utils::Range range, + backend::BufferObjectHandle renderablesUbo, Variant variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, math::float3 cameraPosition, math::float3 cameraForward, uint8_t instancedStereoEyeCount) noexcept; @@ -412,23 +446,12 @@ class RenderPass { static void updateSummedPrimitiveCounts( FScene::RenderableSoa& renderableData, utils::Range vr) noexcept; - FScene::RenderableSoa const& mRenderableSoa; - utils::Range const mVisibleRenderables; - backend::Handle const mUboHandle; - math::float3 const mCameraPosition; - math::float3 const mCameraForwardVector; - RenderFlags const mFlags; - Variant const mVariant; - FScene::VisibleMaskType const mVisibilityMask; backend::Viewport const mScissorViewport; - - // Pointer to the first command - Command* mCommandBegin = nullptr; - // Pointer to one past the last command - Command* mCommandEnd = nullptr; + Command* mCommandBegin = nullptr; // Pointer to the first command + Command* mCommandEnd = nullptr; // Pointer to one past the last command // a UBO for instanced primitives - backend::Handle mInstancedUboHandle; + BufferObjectSharedHandle mInstancedUboHandle; // a vector for our custom commands using CustomCommandVector = std::vector>; diff --git a/filament/src/RendererUtils.cpp b/filament/src/RendererUtils.cpp index 3e1cafdbc9d..e5b29567fe4 100644 --- a/filament/src/RendererUtils.cpp +++ b/filament/src/RendererUtils.cpp @@ -54,7 +54,7 @@ FrameGraphId RendererUtils::colorPass( FrameGraph& fg, const char* name, FEngine& engine, FView const& view, FrameGraphTexture::Descriptor const& colorBufferDesc, ColorPassConfig const& config, PostProcessManager::ColorGradingConfig colorGradingConfig, - RenderPass::Executor const& passExecutor) noexcept { + RenderPass::Executor passExecutor) noexcept { struct ColorPassData { FrameGraphId shadows; @@ -200,7 +200,7 @@ FrameGraphId RendererUtils::colorPass( .clearFlags = clearColorFlags | clearDepthFlags | clearStencilFlags}); blackboard["depth"] = data.depth; }, - [=, &view, &engine](FrameGraphResources const& resources, + [=, passExecutor = std::move(passExecutor), &view, &engine](FrameGraphResources const& resources, ColorPassData const& data, DriverApi& driver) { auto out = resources.getRenderPassInfo(); diff --git a/filament/src/RendererUtils.h b/filament/src/RendererUtils.h index 0a5f7b2808b..9b7e93cbc60 100644 --- a/filament/src/RendererUtils.h +++ b/filament/src/RendererUtils.h @@ -76,7 +76,7 @@ class RendererUtils { FrameGraphTexture::Descriptor const& colorBufferDesc, ColorPassConfig const& config, PostProcessManager::ColorGradingConfig colorGradingConfig, - RenderPass::Executor const& passExecutor) noexcept; + RenderPass::Executor passExecutor) noexcept; static std::pair, bool> refractionPass( FrameGraph& fg, FEngine& engine, FView const& view, diff --git a/filament/src/ShadowMapManager.cpp b/filament/src/ShadowMapManager.cpp index 4f971450fbe..8943371855c 100644 --- a/filament/src/ShadowMapManager.cpp +++ b/filament/src/ShadowMapManager.cpp @@ -365,7 +365,8 @@ FrameGraphId ShadowMapManager::render(FEngine& engine, FrameG .camera(cameraInfo) .visibilityMask(entry.visibilityMask) .geometry(scene->getRenderableData(), - entry.range, scene->getRenderableUBO()) + entry.range, + view.getRenderableUBO()) .commandTypeFlags(RenderPass::CommandTypeFlags::SHADOW) .build(engine); diff --git a/filament/src/SharedHandle.h b/filament/src/SharedHandle.h new file mode 100644 index 00000000000..8d793f31e43 --- /dev/null +++ b/filament/src/SharedHandle.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TNT_FILAMENT_SHARED_HANDLE_H +#define TNT_FILAMENT_SHARED_HANDLE_H + +#include + +#include + +namespace filament { + +/* + * SharedHandle is a bit like shared_ptr<> but for Handle<>, the destruction is + * performed by a Deleter functor that needs to be provided. We only support strong + * references for now. + * + * caveat: The current implementation is not thread-safe. + */ +template +struct SharedHandle { + SharedHandle() noexcept = default; + + ~SharedHandle() noexcept { + dec(mControlBlockPtr); + } + + SharedHandle(SharedHandle const& rhs) noexcept + : mControlBlockPtr(inc(rhs.mControlBlockPtr)) { + } + + SharedHandle(SharedHandle&& rhs) noexcept { + std::swap(mControlBlockPtr, rhs.mControlBlockPtr); + } + + SharedHandle& operator=(SharedHandle const& rhs) noexcept { + if (this != &rhs) { + inc(rhs.mControlBlockPtr); // add a reference to other control block + dec(mControlBlockPtr); // drop a reference from ours (possibly destroying it) + mControlBlockPtr = rhs.mControlBlockPtr; // adopt the new control block + } + return *this; + } + + SharedHandle& operator=(SharedHandle&& rhs) noexcept { + if (this != &rhs) { + std::swap(mControlBlockPtr, rhs.mControlBlockPtr); + } + return *this; + } + + // initialize the SharedHandle and provide a Deleter + template + explicit SharedHandle(backend::Handle const& rhs, ARGS&& ... args) noexcept + : mControlBlockPtr(new ControlBlock(rhs, std::forward(args)...)) { + } + + // initialize the SharedHandle and provide a Deleter + template + explicit SharedHandle(backend::Handle&& rhs, ARGS&& ... args) noexcept + : mControlBlockPtr(new ControlBlock(rhs, std::forward(args)...)) { + } + + // automatically converts to Handle + operator backend::Handle() const noexcept { // NOLINT(*-explicit-constructor) + return mControlBlockPtr ? mControlBlockPtr->handle : backend::Handle{}; + } + + explicit operator bool() const noexcept { + return mControlBlockPtr ? (bool)mControlBlockPtr->handle : false; + } + + void clear() noexcept { dec(mControlBlockPtr); } + +private: + struct ControlBlock { + template + explicit ControlBlock(backend::Handle handle, ARGS&& ... args) noexcept + : deleter(std::forward(args)...), handle(std::move(handle)) { + } + void inc() noexcept { + ++count; + } + void dec() noexcept { + if (--count == 0) { + deleter(handle); + delete this; + } + } + Deleter deleter; + int32_t count = 1; + backend::Handle handle; + }; + + ControlBlock* inc(ControlBlock* const ctrlBlk) noexcept { + if (ctrlBlk) { + ctrlBlk->inc(); + } + return ctrlBlk; + } + + void dec(ControlBlock* const ctrlBlk) noexcept { + if (ctrlBlk) { + ctrlBlk->dec(); + } + } + + ControlBlock* mControlBlockPtr = nullptr; +}; + +} // namespace filament + +#endif // TNT_FILAMENT_SHARED_HANDLE_H diff --git a/filament/src/SwapChain.cpp b/filament/src/SwapChain.cpp index a242ef06ccb..dd7db7bd011 100644 --- a/filament/src/SwapChain.cpp +++ b/filament/src/SwapChain.cpp @@ -28,12 +28,13 @@ void* SwapChain::getNativeWindow() const noexcept { return downcast(this)->getNativeWindow(); } -void SwapChain::setFrameScheduledCallback(FrameScheduledCallback callback, void* user) { - downcast(this)->setFrameScheduledCallback(callback, user); +void SwapChain::setFrameScheduledCallback( + backend::CallbackHandler* handler, FrameScheduledCallback&& callback) { + downcast(this)->setFrameScheduledCallback(handler, std::move(callback)); } -SwapChain::FrameScheduledCallback SwapChain::getFrameScheduledCallback() const noexcept { - return downcast(this)->getFrameScheduledCallback(); +bool SwapChain::isFrameScheduledCallbackSet() const noexcept { + return downcast(this)->isFrameScheduledCallbackSet(); } void SwapChain::setFrameCompletedCallback(backend::CallbackHandler* handler, diff --git a/filament/src/components/RenderableManager.cpp b/filament/src/components/RenderableManager.cpp index 3bbd85597ce..2cd031059b3 100644 --- a/filament/src/components/RenderableManager.cpp +++ b/filament/src/components/RenderableManager.cpp @@ -26,15 +26,40 @@ #include "details/InstanceBuffer.h" #include "details/Material.h" -#include "filament/RenderableManager.h" +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include #include +#include #include -#include +#include + +#include +#include +#include +#include + +#include +#include #include +#include +#include + +#include +#include +#include using namespace filament::math; using namespace utils; diff --git a/filament/src/components/RenderableManager.h b/filament/src/components/RenderableManager.h index 003026baa0d..70332c63bbf 100644 --- a/filament/src/components/RenderableManager.h +++ b/filament/src/components/RenderableManager.h @@ -20,24 +20,22 @@ #include "downcast.h" #include "HwRenderPrimitiveFactory.h" -#include "UniformBuffer.h" #include
#include +#include #include -#include - #include #include #include #include +#include #include #include #include -#include #include diff --git a/filament/src/details/Engine.cpp b/filament/src/details/Engine.cpp index 7a97aedd7e6..d57df19550a 100644 --- a/filament/src/details/Engine.cpp +++ b/filament/src/details/Engine.cpp @@ -581,6 +581,8 @@ void FEngine::flush() { } void FEngine::flushAndWait() { + ASSERT_PRECONDITION(!mCommandBufferQueue.isPaused(), + "Cannot call flushAndWait() when rendering thread is paused!"); #if defined(__ANDROID__) @@ -1218,6 +1220,10 @@ void FEngine::destroy(FEngine* engine) { } } +bool FEngine::isPaused() const noexcept { + return mCommandBufferQueue.isPaused(); +} + void FEngine::setPaused(bool paused) { mCommandBufferQueue.setPaused(paused); } diff --git a/filament/src/details/Engine.h b/filament/src/details/Engine.h index c31549e9da5..5d72ac9c2ee 100644 --- a/filament/src/details/Engine.h +++ b/filament/src/details/Engine.h @@ -246,6 +246,10 @@ class FEngine : public Engine { case Backend::VULKAN: return { backend::ShaderLanguage::SPIRV }; case Backend::METAL: + const auto& lang = mConfig.preferredShaderLanguage; + if (lang == Config::ShaderLanguage::MSL) { + return { backend::ShaderLanguage::MSL, backend::ShaderLanguage::METAL_LIBRARY }; + } return { backend::ShaderLanguage::METAL_LIBRARY, backend::ShaderLanguage::MSL }; } } @@ -344,6 +348,7 @@ class FEngine : public Engine { void destroy(utils::Entity e); + bool isPaused() const noexcept; void setPaused(bool paused); void flushAndWait(); diff --git a/filament/src/details/Renderer.cpp b/filament/src/details/Renderer.cpp index d9a20d79379..8fc05cf537d 100644 --- a/filament/src/details/Renderer.cpp +++ b/filament/src/details/Renderer.cpp @@ -298,7 +298,11 @@ bool FRenderer::beginFrame(FSwapChain* swapChain, uint64_t vsyncSteadyClockTimeN FEngine& engine = mEngine; FEngine::DriverApi& driver = engine.getDriverApi(); - driver.beginFrame(appVsync.time_since_epoch().count(), mFrameId); + driver.beginFrame( + appVsync.time_since_epoch().count(), + mDisplayInfo.refreshRate == 0.0 ? 0 : int64_t( + 1'000'000'000.0 / mDisplayInfo.refreshRate), + mFrameId); // This need to occur after the backend beginFrame() because some backends need to start // a command buffer before creating a fence. @@ -462,7 +466,11 @@ void FRenderer::renderStandaloneView(FView const* view) { engine.prepare(); FEngine::DriverApi& driver = engine.getDriverApi(); - driver.beginFrame(steady_clock::now().time_since_epoch().count(), mFrameId); + driver.beginFrame( + steady_clock::now().time_since_epoch().count(), + mDisplayInfo.refreshRate == 0.0 ? 0 : int64_t( + 1'000'000'000.0 / mDisplayInfo.refreshRate), + mFrameId); renderInternal(view); @@ -832,7 +840,8 @@ void FRenderer::renderJob(RootArenaScope& rootArenaScope, FView& view) { passBuilder.camera(cameraInfo); passBuilder.geometry(scene.getRenderableData(), - view.getVisibleRenderables(), scene.getRenderableUBO()); + view.getVisibleRenderables(), + view.getRenderableUBO()); // view set-ups that need to happen before rendering fg.addTrivialSideEffectPass("Prepare View Uniforms", diff --git a/filament/src/details/Scene.cpp b/filament/src/details/Scene.cpp index ff6af6293a5..01888e89da1 100644 --- a/filament/src/details/Scene.cpp +++ b/filament/src/details/Scene.cpp @@ -388,9 +388,6 @@ void FScene::updateUBOs( SYSTRACE_CALL(); FEngine::DriverApi& driver = mEngine.getDriverApi(); - // store the UBO handle - mRenderableViewUbh = renderableUbh; - // don't allocate more than 16 KiB directly into the render stream static constexpr size_t MAX_STREAM_ALLOCATION_COUNT = 64; // 16 KiB const size_t count = visibleRenderables.size(); @@ -450,8 +447,6 @@ void FScene::updateUBOs( } void FScene::terminate(FEngine&) { - // DO NOT destroy this UBO, it's owned by the View - mRenderableViewUbh.clear(); } void FScene::prepareDynamicLights(const CameraInfo& camera, diff --git a/filament/src/details/Scene.h b/filament/src/details/Scene.h index 490d115af3c..9690e6386e2 100644 --- a/filament/src/details/Scene.h +++ b/filament/src/details/Scene.h @@ -80,10 +80,6 @@ class FScene : public Scene { void prepareDynamicLights(const CameraInfo& camera, backend::Handle lightUbh) noexcept; - backend::Handle getRenderableUBO() const noexcept { - return mRenderableViewUbh; - } - /* * Storage for per-frame renderable data */ @@ -228,7 +224,6 @@ class FScene : public Scene { */ RenderableSoa mRenderableData; LightSoa mLightData; - backend::Handle mRenderableViewUbh; // This is actually owned by the view. bool mHasContactShadows = false; // State shared between Scene and driver callbacks. diff --git a/filament/src/details/SwapChain.cpp b/filament/src/details/SwapChain.cpp index ef4eb4fabd4..0407d893763 100644 --- a/filament/src/details/SwapChain.cpp +++ b/filament/src/details/SwapChain.cpp @@ -69,13 +69,14 @@ void FSwapChain::terminate(FEngine& engine) noexcept { engine.getDriverApi().destroySwapChain(mHwSwapChain); } -void FSwapChain::setFrameScheduledCallback(FrameScheduledCallback callback, void* user) { - mFrameScheduledCallback = callback; - mEngine.getDriverApi().setFrameScheduledCallback(mHwSwapChain, callback, user); +void FSwapChain::setFrameScheduledCallback( + backend::CallbackHandler* handler, FrameScheduledCallback&& callback) { + mFrameScheduledCallbackIsSet = bool(callback); + mEngine.getDriverApi().setFrameScheduledCallback(mHwSwapChain, handler, std::move(callback)); } -SwapChain::FrameScheduledCallback FSwapChain::getFrameScheduledCallback() const noexcept { - return mFrameScheduledCallback; +bool FSwapChain::isFrameScheduledCallbackSet() const noexcept { + return mFrameScheduledCallbackIsSet; } void FSwapChain::setFrameCompletedCallback(backend::CallbackHandler* handler, diff --git a/filament/src/details/SwapChain.h b/filament/src/details/SwapChain.h index 7a97727e832..efe7483563e 100644 --- a/filament/src/details/SwapChain.h +++ b/filament/src/details/SwapChain.h @@ -78,9 +78,10 @@ class FSwapChain : public SwapChain { return mHwSwapChain; } - void setFrameScheduledCallback(FrameScheduledCallback callback, void* user); + void setFrameScheduledCallback( + backend::CallbackHandler* handler, FrameScheduledCallback&& callback); - FrameScheduledCallback getFrameScheduledCallback() const noexcept; + bool isFrameScheduledCallbackSet() const noexcept; void setFrameCompletedCallback(backend::CallbackHandler* handler, utils::Invocable&& callback) noexcept; @@ -96,7 +97,7 @@ class FSwapChain : public SwapChain { private: FEngine& mEngine; backend::Handle mHwSwapChain; - FrameScheduledCallback mFrameScheduledCallback{}; + bool mFrameScheduledCallbackIsSet = false; void* mNativeWindow{}; uint32_t mWidth{}; uint32_t mHeight{}; diff --git a/filament/src/details/VertexBuffer.cpp b/filament/src/details/VertexBuffer.cpp index d718bbe0a85..2edf62d8555 100644 --- a/filament/src/details/VertexBuffer.cpp +++ b/filament/src/details/VertexBuffer.cpp @@ -314,7 +314,7 @@ FVertexBuffer::FVertexBuffer(FEngine& engine, const VertexBuffer::Builder& build void FVertexBuffer::terminate(FEngine& engine) { FEngine::DriverApi& driver = engine.getDriverApi(); if (!mBufferObjectsEnabled) { - for (BufferObjectHandle const bo : mBufferObjects) { + for (BufferObjectHandle bo : mBufferObjects) { driver.destroyBufferObject(bo); } } diff --git a/filament/src/details/View.cpp b/filament/src/details/View.cpp index 9ce255332d1..19b50ce7762 100644 --- a/filament/src/details/View.cpp +++ b/filament/src/details/View.cpp @@ -649,7 +649,8 @@ void FView::prepare(FEngine& engine, DriverApi& driver, RootArenaScope& rootAren const size_t count = std::max(size_t(16u), (4u * merged.size() + 2u) / 3u); mRenderableUBOSize = uint32_t(count * sizeof(PerRenderableData)); driver.destroyBufferObject(mRenderableUbh); - mRenderableUbh = driver.createBufferObject(mRenderableUBOSize + sizeof(PerRenderableUib), + mRenderableUbh = driver.createBufferObject( + mRenderableUBOSize + sizeof(PerRenderableUib), BufferObjectBinding::UNIFORM, BufferUsage::DYNAMIC); } else { // TODO: should we shrink the underlying UBO at some point? diff --git a/filament/src/details/View.h b/filament/src/details/View.h index be15af9e520..c2fb5bb58b5 100644 --- a/filament/src/details/View.h +++ b/filament/src/details/View.h @@ -437,8 +437,11 @@ class FView : public View { return mFogEntity; } -private: + backend::Handle getRenderableUBO() const noexcept { + return mRenderableUbh; + } +private: struct FPickingQuery : public PickingQuery { private: FPickingQuery(uint32_t x, uint32_t y, diff --git a/filament/src/materials/blitDepth.mat b/filament/src/materials/blitDepth.mat new file mode 100644 index 00000000000..c8536ad545c --- /dev/null +++ b/filament/src/materials/blitDepth.mat @@ -0,0 +1,42 @@ +material { + name : blitDepth, + parameters : [ + { + type : sampler2d, + name : depth, + precision: medium + }, + { + type : float4, + name : viewport, + precision: high + } + ], + outputs : [ + { + name : depth, + target : depth, + type : float + } + ], + variables : [ + vertex + ], + culling: none, + depthWrite : true, + depthCulling : false, + domain: postprocess, +} + +vertex { + void postProcessVertex(inout PostProcessVertexInputs postProcess) { + postProcess.vertex.xy = materialParams.viewport.xy + postProcess.normalizedUV * materialParams.viewport.zw; + postProcess.vertex.xy = uvToRenderTargetUV(postProcess.vertex.xy); + } +} + +fragment { + void postProcess(inout PostProcessInputs postProcess) { + postProcess.depth = textureLod(materialParams_depth, variable_vertex.xy, 0.0).r; + } +} diff --git a/ios/CocoaPods/Filament.podspec b/ios/CocoaPods/Filament.podspec index 3c879e3037e..ec121f61e9e 100644 --- a/ios/CocoaPods/Filament.podspec +++ b/ios/CocoaPods/Filament.podspec @@ -1,12 +1,12 @@ Pod::Spec.new do |spec| spec.name = "Filament" - spec.version = "1.51.6" + spec.version = "1.51.7" spec.license = { :type => "Apache 2.0", :file => "LICENSE" } spec.homepage = "https://google.github.io/filament" spec.authors = "Google LLC." spec.summary = "Filament is a real-time physically based rendering engine for Android, iOS, Windows, Linux, macOS, and WASM/WebGL." spec.platform = :ios, "11.0" - spec.source = { :http => "https://github.com/google/filament/releases/download/v1.51.6/filament-v1.51.6-ios.tgz" } + spec.source = { :http => "https://github.com/google/filament/releases/download/v1.51.7/filament-v1.51.7-ios.tgz" } # Fix linking error with Xcode 12; we do not yet support the simulator on Apple silicon. spec.pod_target_xcconfig = { diff --git a/libs/filagui/CMakeLists.txt b/libs/filagui/CMakeLists.txt index 66058c4103c..55f3d867ff5 100644 --- a/libs/filagui/CMakeLists.txt +++ b/libs/filagui/CMakeLists.txt @@ -33,6 +33,11 @@ endif() set(MATERIAL_SRCS src/materials/uiBlit.mat) +if (ANDROID) + list(APPEND MATERIAL_SRCS + src/materials/uiBlitExternal.mat) +endif() + file(MAKE_DIRECTORY ${MATERIAL_DIR}) foreach (mat_src ${MATERIAL_SRCS}) diff --git a/libs/filagui/include/filagui/ImGuiHelper.h b/libs/filagui/include/filagui/ImGuiHelper.h index 9ad4f0765e1..3f55ac77bbd 100644 --- a/libs/filagui/include/filagui/ImGuiHelper.h +++ b/libs/filagui/include/filagui/ImGuiHelper.h @@ -84,12 +84,14 @@ class UTILS_PUBLIC ImGuiHelper { filament::View* mView; // The view is owned by the client. filament::Scene* mScene; filament::Material* mMaterial2d = nullptr; + std::vector mMaterial2dInstances; +#ifdef __ANDROID__ filament::Material* mMaterialExternal = nullptr; + std::vector mMaterialExternalInstances; +#endif filament::Camera* mCamera = nullptr; std::vector mVertexBuffers; std::vector mIndexBuffers; - std::vector mMaterial2dInstances; - std::vector mMaterialExternalInstances; utils::Entity mRenderable; utils::Entity mCameraEntity; filament::Texture* mTexture = nullptr; diff --git a/libs/filagui/src/ImGuiHelper.cpp b/libs/filagui/src/ImGuiHelper.cpp index 935ae138a4d..7d22547fdb9 100644 --- a/libs/filagui/src/ImGuiHelper.cpp +++ b/libs/filagui/src/ImGuiHelper.cpp @@ -62,12 +62,12 @@ ImGuiHelper::ImGuiHelper(Engine* engine, filament::View* view, const Path& fontP // Create a simple alpha-blended 2D blitting material. mMaterial2d = Material::Builder() .package(FILAGUI_RESOURCES_UIBLIT_DATA, FILAGUI_RESOURCES_UIBLIT_SIZE) - .constant("external", false) .build(*engine); +#ifdef __ANDROID__ mMaterialExternal = Material::Builder() - .package(FILAGUI_RESOURCES_UIBLIT_DATA, FILAGUI_RESOURCES_UIBLIT_SIZE) - .constant("external", true) + .package(FILAGUI_RESOURCES_UIBLITEXTERNAL_DATA, FILAGUI_RESOURCES_UIBLITEXTERNAL_SIZE) .build(*engine); +#endif // If the given font path is invalid, ImGui will silently fall back to proggy, which is a // tiny "pixel art" texture that is compiled into the library. @@ -79,7 +79,7 @@ ImGuiHelper::ImGuiHelper(Engine* engine, filament::View* view, const Path& fontP // For proggy, switch to NEAREST for pixel-perfect text. if (!fontPath.isFile() && !imGuiContext) { mSampler = TextureSampler(MinFilter::NEAREST, MagFilter::NEAREST); - mMaterial2d->setDefaultParameter("albedo2d", mTexture, mSampler); + mMaterial2d->setDefaultParameter("albedo", mTexture, mSampler); } utils::EntityManager& em = utils::EntityManager::get(); @@ -122,7 +122,7 @@ void ImGuiHelper::createAtlasTexture(Engine* engine) { mTexture->setImage(*engine, 0, std::move(pb)); mSampler = TextureSampler(MinFilter::LINEAR, MagFilter::LINEAR); - mMaterial2d->setDefaultParameter("albedo2d", mTexture, mSampler); + mMaterial2d->setDefaultParameter("albedo", mTexture, mSampler); } ImGuiHelper::~ImGuiHelper() { @@ -134,10 +134,12 @@ ImGuiHelper::~ImGuiHelper() { mEngine->destroy(mi); } mEngine->destroy(mMaterial2d); +#ifdef __ANDROID__ for (auto& mi : mMaterialExternalInstances) { mEngine->destroy(mi); } mEngine->destroy(mMaterialExternal); +#endif mEngine->destroy(mTexture); for (auto& vb : mVertexBuffers) { mEngine->destroy(vb); @@ -227,19 +229,19 @@ void ImGuiHelper::processImGuiCommands(ImDrawData* commands, const ImGuiIO& io) pcmd.UserCallback(cmds, &pcmd); } else { auto texture = (Texture const*)pcmd.TextureId; - const char* uniformName; MaterialInstance* materialInstance; +#ifdef __ANDROID__ if (texture && texture->getTarget() == Texture::Sampler::SAMPLER_EXTERNAL) { if (materialExternalIndex == mMaterialExternalInstances.size()) { mMaterialExternalInstances.push_back(mMaterialExternal->createInstance()); } - uniformName = "albedoExternal"; materialInstance = mMaterialExternalInstances[materialExternalIndex++]; - } else { + } else +#endif + { if (material2dIndex == mMaterial2dInstances.size()) { mMaterial2dInstances.push_back(mMaterial2d->createInstance()); } - uniformName = "albedo2d"; materialInstance = mMaterial2dInstances[material2dIndex++]; } materialInstance->setScissor( @@ -249,9 +251,9 @@ void ImGuiHelper::processImGuiCommands(ImDrawData* commands, const ImGuiIO& io) (uint16_t) (pcmd.ClipRect.w - pcmd.ClipRect.y)); if (texture) { TextureSampler sampler(MinFilter::LINEAR, MagFilter::LINEAR); - materialInstance->setParameter(uniformName, texture, sampler); + materialInstance->setParameter("albedo", texture, sampler); } else { - materialInstance->setParameter(uniformName, mTexture, mSampler); + materialInstance->setParameter("albedo", mTexture, mSampler); } rbuilder .geometry(primIndex, RenderableManager::PrimitiveType::TRIANGLES, diff --git a/libs/filagui/src/materials/uiBlit.mat b/libs/filagui/src/materials/uiBlit.mat index 4f9e85c5438..774113fbd33 100644 --- a/libs/filagui/src/materials/uiBlit.mat +++ b/libs/filagui/src/materials/uiBlit.mat @@ -1,19 +1,9 @@ material { name : uiBlit, - constants : [ - { - name : external, - type : bool - } - ], parameters : [ - { - type : samplerExternal, - name : albedoExternal - }, { type : sampler2d, - name : albedo2d + name : albedo } ], requires : [ @@ -32,9 +22,7 @@ fragment { prepareMaterial(material); vec2 uv = getUV0(); uv.y = 1.0 - uv.y; - vec4 albedo = materialConstants_external - ? texture2D(materialParams_albedoExternal, uv) - : texture2D(materialParams_albedo2d, uv); + vec4 albedo = texture2D(materialParams_albedo, uv); material.baseColor = getColor() * albedo; material.baseColor.rgb *= material.baseColor.a; } diff --git a/libs/filagui/src/materials/uiBlitExternal.mat b/libs/filagui/src/materials/uiBlitExternal.mat new file mode 100644 index 00000000000..f07187837de --- /dev/null +++ b/libs/filagui/src/materials/uiBlitExternal.mat @@ -0,0 +1,29 @@ +material { + name : uiBlitExternal, + parameters : [ + { + type : samplerExternal, + name : albedo + } + ], + requires : [ + uv0, + color + ], + shadingModel : unlit, + culling : none, + depthCulling: false, + blending : transparent, + featureLevel : 0 +} + +fragment { + void material(inout MaterialInputs material) { + prepareMaterial(material); + vec2 uv = getUV0(); + uv.y = 1.0 - uv.y; + vec4 albedo = texture2D(materialParams_albedo, uv); + material.baseColor = getColor() * albedo; + material.baseColor.rgb *= material.baseColor.a; + } +} diff --git a/libs/filamat/src/GLSLPostProcessor.cpp b/libs/filamat/src/GLSLPostProcessor.cpp index ce8db716dfc..6765b122f51 100644 --- a/libs/filamat/src/GLSLPostProcessor.cpp +++ b/libs/filamat/src/GLSLPostProcessor.cpp @@ -604,7 +604,13 @@ std::shared_ptr GLSLPostProcessor::createOptimizer( }); if (optimization == MaterialBuilder::Optimization::SIZE) { - registerSizePasses(*optimizer, config); + // When optimizing for size, we don't run the SPIR-V through any size optimization passes + // when targeting MSL. This results in better line dictionary compression. We do, however, + // still register the passes necessary (below) to support half precision floating point + // math. + if (config.targetApi != MaterialBuilder::TargetApi::METAL) { + registerSizePasses(*optimizer, config); + } } else if (optimization == MaterialBuilder::Optimization::PERFORMANCE) { registerPerformancePasses(*optimizer, config); } @@ -719,7 +725,6 @@ void GLSLPostProcessor::registerSizePasses(Optimizer& optimizer, Config const& c RegisterPass(CreateWrapOpKillPass()); RegisterPass(CreateDeadBranchElimPass()); - RegisterPass(CreateMergeReturnPass(), MaterialBuilder::TargetApi::METAL); RegisterPass(CreateInlineExhaustivePass()); RegisterPass(CreateEliminateDeadFunctionsPass()); RegisterPass(CreatePrivateToLocalPass()); @@ -728,11 +733,9 @@ void GLSLPostProcessor::registerSizePasses(Optimizer& optimizer, Config const& c RegisterPass(CreateCCPPass()); RegisterPass(CreateLoopUnrollPass(true)); RegisterPass(CreateDeadBranchElimPass()); - RegisterPass(CreateSimplificationPass(), MaterialBuilder::TargetApi::METAL); RegisterPass(CreateScalarReplacementPass(0)); RegisterPass(CreateLocalSingleStoreElimPass()); RegisterPass(CreateIfConversionPass()); - RegisterPass(CreateSimplificationPass(), MaterialBuilder::TargetApi::METAL); RegisterPass(CreateAggressiveDCEPass()); RegisterPass(CreateDeadBranchElimPass()); RegisterPass(CreateBlockMergePass()); @@ -748,7 +751,6 @@ void GLSLPostProcessor::registerSizePasses(Optimizer& optimizer, Config const& c RegisterPass(CreateBlockMergePass()); RegisterPass(CreateLocalMultiStoreElimPass()); RegisterPass(CreateRedundancyEliminationPass()); - RegisterPass(CreateSimplificationPass(), MaterialBuilder::TargetApi::METAL); RegisterPass(CreateAggressiveDCEPass()); RegisterPass(CreateCFGCleanupPass()); } diff --git a/libs/gltfio/include/gltfio/AssetLoader.h b/libs/gltfio/include/gltfio/AssetLoader.h index f516166a800..bf650f69477 100644 --- a/libs/gltfio/include/gltfio/AssetLoader.h +++ b/libs/gltfio/include/gltfio/AssetLoader.h @@ -38,6 +38,23 @@ namespace filament::gltfio { class NodeManager; +// Use this struct to enable mikktspace-based tangent-space computation. +/** + * \struct AssetConfigurationExtended AssetLoader.h gltfio/AssetLoader.h + * \brief extends struct AssetConfiguration + * Useful if client needs mikktspace tangent space computation. + * NOTE: Android, iOS, Web are not supported. And only disk-local glTF resources are supported. + */ +struct AssetConfigurationExtended { + //! Optional The same parameter as provided to \struct ResourceConfiguration ResourceLoader.h + //! gltfio/ResourceLoader.h + char const* gltfPath; + + //! Client can use this method to check if the extended implementation is supported on their + //! platform or not. + static bool isSupported(); +}; + /** * \struct AssetConfiguration AssetLoader.h gltfio/AssetLoader.h * \brief Construction parameters for AssetLoader. @@ -62,6 +79,10 @@ struct AssetConfiguration { //! Optional default node name for anonymous nodes char* defaultNodeName = nullptr; + + //! Optional to enable mikktspace tangents. Lifetime of struct only needs to be maintained for + // the duration of the constructor of AssetLoader. + AssetConfigurationExtended* ext = nullptr; }; /** diff --git a/libs/gltfio/src/AssetLoader.cpp b/libs/gltfio/src/AssetLoader.cpp index fa73eb27417..61a2c6ed589 100644 --- a/libs/gltfio/src/AssetLoader.cpp +++ b/libs/gltfio/src/AssetLoader.cpp @@ -24,6 +24,7 @@ #include "FTrsTransformManager.h" #include "GltfEnums.h" #include "Utility.h" +#include "extended/AssetLoaderExtended.h" #include #include @@ -57,6 +58,8 @@ #include "downcast.h" +#include + using namespace filament; using namespace filament::math; using namespace utils; @@ -202,14 +205,21 @@ class MaterialInstanceCache { }; struct FAssetLoader : public AssetLoader { - FAssetLoader(const AssetConfiguration& config) : + FAssetLoader(AssetConfiguration const& config) : mEntityManager(config.entities ? *config.entities : EntityManager::get()), mRenderableManager(config.engine->getRenderableManager()), mNameManager(config.names), mTransformManager(config.engine->getTransformManager()), mMaterials(*config.materials), mEngine(*config.engine), - mDefaultNodeName(config.defaultNodeName) {} + mDefaultNodeName(config.defaultNodeName) { + if (config.ext) { + ASSERT_PRECONDITION(AssetConfigurationExtended::isSupported(), + "Extend asset loading is not supported on this platform"); + mLoaderExtended = std::make_unique( + *config.ext, config.engine, mMaterials); + } + } FFilamentAsset* createAsset(const uint8_t* bytes, uint32_t nbytes); FFilamentAsset* createInstancedAsset(const uint8_t* bytes, uint32_t numBytes, @@ -292,6 +302,9 @@ struct FAssetLoader : public AssetLoader { // Weak reference to the largest dummy buffer so far in the current loading phase. BufferObject* mDummyBufferObject = nullptr; + +public: + std::unique_ptr mLoaderExtended; }; FILAMENT_DOWNCAST(AssetLoader) @@ -422,7 +435,7 @@ FFilamentAsset* FAssetLoader::createRootAsset(const cgltf_data* srcAsset) { mDummyBufferObject = nullptr; FFilamentAsset* fAsset = new FFilamentAsset(&mEngine, mNameManager, &mEntityManager, - &mNodeManager, &mTrsTransformManager, srcAsset); + &mNodeManager, &mTrsTransformManager, srcAsset, (bool) mLoaderExtended); // It is not an error for a glTF file to have zero scenes. fAsset->mScenes.clear(); @@ -620,12 +633,41 @@ void FAssetLoader::createPrimitives(const cgltf_node* node, const char* name, for (cgltf_size index = 0, n = mesh->primitives_count; index < n; ++index) { Primitive& outputPrim = prims[index]; - const cgltf_primitive& inputPrim = mesh->primitives[index]; - - // Create a Filament VertexBuffer and IndexBuffer for this prim if we haven't already. - if (!outputPrim.vertices && !createPrimitive(inputPrim, name, &outputPrim, fAsset)) { - mError = true; - return; + cgltf_primitive& inputPrim = mesh->primitives[index]; + + if (!outputPrim.vertices) { + if (mLoaderExtended) { + auto& resourceInfo = std::get(fAsset->mResourceInfo); + resourceInfo.uriDataCache = mLoaderExtended->getUriDataCache(); + AssetLoaderExtended::Input input{ + .gltf = gltf, + .prim = &inputPrim, + .name = name, + .dracoCache = &fAsset->mSourceAsset->dracoCache, + .material = getMaterial(gltf, inputPrim.material, &outputPrim.uvmap, + utility::primitiveHasVertexColor(&inputPrim)), + }; + + mError = !mLoaderExtended->createPrimitive(&input, &outputPrim, resourceInfo.slots); + if (!mError) { + if (outputPrim.vertices) { + fAsset->mVertexBuffers.push_back(outputPrim.vertices); + } + if (outputPrim.indices) { + fAsset->mIndexBuffers.push_back(outputPrim.indices); + } + if (outputPrim.targets) { + fAsset->mMorphTargetBuffers.push_back(outputPrim.targets); + } + } + } else { + // Create a Filament VertexBuffer and IndexBuffer for this prim if we haven't + // already. + mError = !createPrimitive(inputPrim, name, &outputPrim, fAsset); + } + if (mError) { + return; + } } // Expand the object-space bounding box. @@ -777,6 +819,8 @@ void FAssetLoader::createMaterialVariants(const cgltf_mesh* mesh, Entity entity, bool FAssetLoader::createPrimitive(const cgltf_primitive& inPrim, const char* name, Primitive* outPrim, FFilamentAsset* fAsset) { + using BufferSlot = FFilamentAsset::ResourceInfo::BufferSlot; + Material* material = getMaterial(fAsset->mSourceAsset->hierarchy, inPrim.material, &outPrim->uvmap, primitiveHasVertexColor(inPrim)); AttributeBitset requiredAttributes = material->getRequiredAttributes(); @@ -787,8 +831,8 @@ bool FAssetLoader::createPrimitive(const cgltf_primitive& inPrim, const char* na // request from Google. // Create a little lambda that appends to the asset's vertex buffer slots. - auto slots = &fAsset->mBufferSlots; - auto addBufferSlot = [slots](BufferSlot entry) { + auto slots = &std::get(fAsset->mResourceInfo).mBufferSlots; + auto addBufferSlot = [slots](FFilamentAsset::ResourceInfo::BufferSlot entry) { slots->push_back(entry); }; @@ -807,7 +851,7 @@ bool FAssetLoader::createPrimitive(const cgltf_primitive& inPrim, const char* na .bufferType(indexType) .build(mEngine); - BufferSlot slot = { accessor }; + FFilamentAsset::ResourceInfo::BufferSlot slot = { accessor }; slot.indexBuffer = indices; addBufferSlot(slot); } else if (inPrim.attributes_count > 0) { @@ -1049,7 +1093,8 @@ bool FAssetLoader::createPrimitive(const cgltf_primitive& inPrim, const char* na outPrim->indices = indices; outPrim->vertices = vertices; - fAsset->mPrimitives.push_back({&inPrim, vertices}); + auto& primitives = std::get(fAsset->mResourceInfo).mPrimitives; + primitives.push_back({&inPrim, vertices}); fAsset->mVertexBuffers.push_back(vertices); for (size_t i = firstSlot; i < slots->size(); ++i) { @@ -1549,6 +1594,14 @@ void FAssetLoader::importSkins(FFilamentInstance* instance, const cgltf_data* gl } } +bool AssetConfigurationExtended::isSupported() { +#if defined(__ANDROID__) || defined(IOS) || defined(__EMSCRIPTEN__) + return false; +#else + return true; +#endif +} + AssetLoader* AssetLoader::create(const AssetConfiguration& config) { return new FAssetLoader(config); } diff --git a/libs/gltfio/src/FFilamentAsset.h b/libs/gltfio/src/FFilamentAsset.h index a29c016d24e..55cdad2c926 100644 --- a/libs/gltfio/src/FFilamentAsset.h +++ b/libs/gltfio/src/FFilamentAsset.h @@ -49,6 +49,7 @@ #include +#include #include #ifdef NDEBUG @@ -74,16 +75,6 @@ namespace filament::gltfio { struct Wireframe; -// Encapsulates VertexBuffer::setBufferAt() or IndexBuffer::setBuffer(). -struct BufferSlot { - const cgltf_accessor* accessor; - cgltf_attribute_type attribute; - int bufferIndex; // for vertex buffer and morph target buffer only - VertexBuffer* vertexBuffer; - IndexBuffer* indexBuffer; - MorphTargetBuffer* morphTargetBuffer; -}; - // Stores a connection between Texture and MaterialInstance; consumed by resource loader so that it // can call "setParameter" on the given MaterialInstance after the Texture has been created. // Since material instances are not typically shared between FilamentInstance, the slots are a @@ -110,14 +101,24 @@ struct Primitive { using MeshCache = utils::FixedCapacityVector>; struct FFilamentAsset : public FilamentAsset { + struct ResourceInfo; + struct ResourceInfoExtended; + FFilamentAsset(Engine* engine, utils::NameComponentManager* names, utils::EntityManager* entityManager, NodeManager* nodeManager, - TrsTransformManager* trsTransformManager, const cgltf_data* srcAsset) : + TrsTransformManager* trsTransformManager, const cgltf_data* srcAsset, + bool useExtendedAlgo) : mEngine(engine), mNameManager(names), mEntityManager(entityManager), mNodeManager(nodeManager), mTrsTransformManager(trsTransformManager), mSourceAsset(new SourceAsset {(cgltf_data*)srcAsset}), mTextures(srcAsset->textures_count), - mMeshCache(srcAsset->meshes_count) {} + mMeshCache(srcAsset->meshes_count) { + if (!useExtendedAlgo) { + mResourceInfo = ResourceInfo{}; + } else { + mResourceInfo = ResourceInfoExtended{}; + } + } ~FFilamentAsset(); @@ -228,6 +229,10 @@ struct FFilamentAsset : public FilamentAsset { mDetachedFilamentComponents = true; } + bool isUsingExtendedAlgorithm() { + return std::holds_alternative(mResourceInfo); + } + // end public API // If a Filament Texture for the given args already exists, calls setParameter() and returns @@ -315,8 +320,50 @@ struct FFilamentAsset : public FilamentAsset { MeshCache mMeshCache; // Asset information that is produced by AssetLoader and consumed by ResourceLoader: - std::vector mBufferSlots; - std::vector > mPrimitives; + struct ResourceInfo { + // Encapsulates VertexBuffer::setBufferAt() or IndexBuffer::setBuffer(). + struct BufferSlot { + const cgltf_accessor* accessor; + cgltf_attribute_type attribute; + int bufferIndex;// for vertex buffer and morph target buffer only + VertexBuffer* vertexBuffer; + IndexBuffer* indexBuffer; + MorphTargetBuffer* morphTargetBuffer; + }; + + std::vector mBufferSlots; + std::vector> mPrimitives; + }; + struct ResourceInfoExtended { + // Used to denote a generated buffer. Set as `index in `CgltfAttribute`. + static constexpr int const GENERATED_0_INDEX = -1; + static constexpr int const GENERATED_1_INDEX = -2; + + struct BufferSlot { + VertexBuffer* vertices = nullptr; + IndexBuffer* indices = nullptr; + MorphTargetBuffer* target = nullptr; + int slot = -1; + size_t sizeInBytes = 0; + + void* data = nullptr; + + // MorphTarget-only data; + struct { + short4* tbn = nullptr; + float3* positions = nullptr; + } targetData; + }; + + std::vector slots; + + // This is to workaround the fact that the original ResourceLoader owns the UriDataCache. In + // the extended implementation, we create it in AssetLoader. We pass it along to + // ResourceLoader here. + UriDataCacheHandle uriDataCache; + }; + + std::variant mResourceInfo; }; FILAMENT_DOWNCAST(FilamentAsset) diff --git a/libs/gltfio/src/ResourceLoader.cpp b/libs/gltfio/src/ResourceLoader.cpp index 135cca7c0e9..9b077e94357 100644 --- a/libs/gltfio/src/ResourceLoader.cpp +++ b/libs/gltfio/src/ResourceLoader.cpp @@ -22,6 +22,7 @@ #include "TangentsJob.h" #include "downcast.h" #include "Utility.h" +#include "extended/ResourceLoaderExtended.h" #include #include @@ -229,8 +230,8 @@ inline void createSkins(cgltf_data const* gltf, bool normalize, inline void uploadBuffers(FFilamentAsset* asset, Engine& engine, UriDataCacheHandle uriDataCache) { // Upload VertexBuffer and IndexBuffer data to the GPU. - auto& slots = asset->mBufferSlots; - for (auto slot: slots) { + auto& slots = std::get(asset->mResourceInfo).mBufferSlots; + for (auto const& slot: slots) { const cgltf_accessor* accessor = slot.accessor; if (!accessor->buffer_view) { continue; @@ -390,6 +391,11 @@ bool ResourceLoader::loadResources(FilamentAsset* asset) { // This is a workaround in case of using extended algo, please see description in // FFilamentAsset.h + if (fasset->isUsingExtendedAlgorithm()) { + pImpl->mUriDataCache = + std::get(fasset->mResourceInfo).uriDataCache; + } + return loadResources(fasset, false); } @@ -402,6 +408,8 @@ bool ResourceLoader::loadResources(FFilamentAsset* asset, bool async) { } asset->mResourcesLoaded = true; + bool const isExtendedAlgo = asset->isUsingExtendedAlgorithm(); + // At this point, any entities that are created in the future (i.e. dynamically added instances) // will not need the progressive feature to be enabled. This simplifies the dependency graph and // prevents it from growing. @@ -414,30 +422,35 @@ bool ResourceLoader::loadResources(FFilamentAsset* asset, bool async) { cgltf_data const* gltf = asset->mSourceAsset->hierarchy; - utility::loadCgltfBuffers(gltf, pImpl->mGltfPath.c_str(), pImpl->mUriDataCache); + if (!isExtendedAlgo) { + utility::loadCgltfBuffers(gltf, pImpl->mGltfPath.c_str(), pImpl->mUriDataCache); - // Decompress Draco meshes early on, which allows us to exploit subsequent processing such - // as tangent generation. - DracoCache* dracoCache = &asset->mSourceAsset->dracoCache; - auto& primitives = asset->mPrimitives; - // Go through every primitive and check if it has a Draco mesh. - for (auto& [prim, vertexBuffer]: primitives) { - if (!prim->has_draco_mesh_compression) { - continue; + // Decompress Draco meshes early on, which allows us to exploit subsequent processing such + // as tangent generation. + DracoCache* dracoCache = &asset->mSourceAsset->dracoCache; + auto& primitives = std::get(asset->mResourceInfo).mPrimitives; + // Go through every primitive and check if it has a Draco mesh. + for (auto& [prim, vertexBuffer]: primitives) { + if (!prim->has_draco_mesh_compression) { + continue; + } + utility::decodeDracoMeshes(gltf, prim, dracoCache); } - utility::decodeDracoMeshes(gltf, prim, dracoCache); - } - utility::decodeMeshoptCompression((cgltf_data*) gltf); + utility::decodeMeshoptCompression((cgltf_data*) gltf); - uploadBuffers(asset, *pImpl->mEngine, pImpl->mUriDataCache); + uploadBuffers(asset, *pImpl->mEngine, pImpl->mUriDataCache); - // Compute surface orientation quaternions if necessary. This is similar to sparse data in - // that we need to generate the contents of a GPU buffer by processing one or more CPU - // buffer(s). - pImpl->computeTangents(asset); + // Compute surface orientation quaternions if necessary. This is similar to sparse data in + // that we need to generate the contents of a GPU buffer by processing one or more CPU + // buffer(s). + pImpl->computeTangents(asset); - asset->mBufferSlots.clear(); - asset->mPrimitives.clear(); + std::get(asset->mResourceInfo).mBufferSlots.clear(); + std::get(asset->mResourceInfo).mPrimitives.clear(); + } else { + auto& slots = std::get(asset->mResourceInfo).slots; + ResourceLoaderExtended::loadResources(slots, pImpl->mEngine, asset->mBufferObjects); + } createSkins(gltf, pImpl->mNormalizeSkinningWeights, asset->mSkins); @@ -663,7 +676,9 @@ void ResourceLoader::Impl::computeTangents(FFilamentAsset* asset) { // Collect all TANGENT vertex attribute slots that need to be populated. tsl::robin_map baseTangents; - for (auto slot : asset->mBufferSlots) { + auto& slots = std::get(asset->mResourceInfo).mBufferSlots; + auto& primitives = std::get(asset->mResourceInfo).mPrimitives; + for (auto const& slot: slots) { if (slot.accessor != kGenerateTangents && slot.accessor != kGenerateNormals) { continue; } @@ -673,7 +688,7 @@ void ResourceLoader::Impl::computeTangents(FFilamentAsset* asset) { // Create a job description for each triangle-based primitive. using Params = TangentsJob::Params; std::vector jobParams; - for (auto [prim, vb] : asset->mPrimitives) { + for (auto const& [prim, vb] : primitives) { if (UTILS_UNLIKELY(prim->type != cgltf_primitive_type_triangles)) { continue; } diff --git a/libs/gltfio/src/extended/AssetLoaderExtended.cpp b/libs/gltfio/src/extended/AssetLoaderExtended.cpp index 08cf85fd9d1..7261abfbf02 100644 --- a/libs/gltfio/src/extended/AssetLoaderExtended.cpp +++ b/libs/gltfio/src/extended/AssetLoaderExtended.cpp @@ -41,9 +41,8 @@ constexpr uint8_t const VERTEX_JOB = 0x1; constexpr uint8_t const INDEX_JOB = 0x2; constexpr uint8_t const MORPH_TARGET_JOB = 0x4; -// TODO: will be updated on proper integration -constexpr int const GENERATED_0 = TmpBufferSlot::GENERATED_0_INDEX; -constexpr int const GENERATED_1 = TmpBufferSlot::GENERATED_1_INDEX; +constexpr int const GENERATED_0 = FFilamentAsset::ResourceInfoExtended::GENERATED_0_INDEX; +constexpr int const GENERATED_1 = FFilamentAsset::ResourceInfoExtended::GENERATED_1_INDEX; using BufferSlot = AssetLoaderExtended::BufferSlot; using BufferType = std::variant; diff --git a/libs/gltfio/src/extended/AssetLoaderExtended.h b/libs/gltfio/src/extended/AssetLoaderExtended.h index 0d9e3cce0e4..7af8054409c 100644 --- a/libs/gltfio/src/extended/AssetLoaderExtended.h +++ b/libs/gltfio/src/extended/AssetLoaderExtended.h @@ -47,33 +47,6 @@ struct FilamentAttribute { int slot; }; -// This will be removed when the extended classes are properly integrated. -struct TmpBufferSlot { - // Used to denote a generated buffer. Set as `index in `CgltfAttribute`. - static constexpr int const GENERATED_0_INDEX = -1; - static constexpr int const GENERATED_1_INDEX = -2; - - VertexBuffer* vertices = nullptr; - IndexBuffer* indices = nullptr; - MorphTargetBuffer* target = nullptr; - int slot = -1; - size_t sizeInBytes = 0; - - void* data = nullptr; - - // MorphTarget-only data; - struct { - short4* tbn = nullptr; - float3* positions = nullptr; - } targetData; -}; - -// This will be removed when the extended classes are properly integrated. -struct TmpAssetConfigurationExtended { - //! gltfio/ResourceLoader.h - char const* gltfPath; -}; - // AssetLoaderExtended performs the same task as AssetLoader. Specifically, it takes the data from // cgltf and store them in CPU memory. These buffers are then forwarded to ResourceLoader for // uplaoding to GPU. The difference between this class AssetLoader is that tangent space computation @@ -81,8 +54,7 @@ struct TmpAssetConfigurationExtended { // TangentSpaceMesh will remesh the input and possibly change the indices, vertex count, and // triangle counts, and so those changes must be resolved before the buffers are sent to the GPU. struct AssetLoaderExtended { - using BufferSlot = TmpBufferSlot; - using AssetConfigurationExtended = TmpAssetConfigurationExtended; + using BufferSlot = FFilamentAsset::ResourceInfoExtended::BufferSlot; using Output = Primitive; struct Input { diff --git a/libs/gltfio/src/extended/ResourceLoaderExtended.h b/libs/gltfio/src/extended/ResourceLoaderExtended.h index 13fba7354f8..c095a046f52 100644 --- a/libs/gltfio/src/extended/ResourceLoaderExtended.h +++ b/libs/gltfio/src/extended/ResourceLoaderExtended.h @@ -17,8 +17,6 @@ #ifndef GLTFIO_RESOURCELOADEREXTENDED_H #define GLTFIO_RESOURCELOADEREXTENDED_H - -#include "AssetLoaderExtended.h" #include "../FFilamentAsset.h" #include @@ -26,7 +24,7 @@ namespace filament::gltfio { struct ResourceLoaderExtended { - using BufferSlot = AssetLoaderExtended::BufferSlot; + using BufferSlot = FFilamentAsset::ResourceInfoExtended::BufferSlot; static void loadResources( std::vector const& slots, filament::Engine* engine, std::vector& bufferObjects); diff --git a/libs/gltfio/src/extended/TangentSpaceMeshWrapper.cpp b/libs/gltfio/src/extended/TangentSpaceMeshWrapper.cpp index 365bd75260a..33ab6f04dbe 100644 --- a/libs/gltfio/src/extended/TangentSpaceMeshWrapper.cpp +++ b/libs/gltfio/src/extended/TangentSpaceMeshWrapper.cpp @@ -28,6 +28,9 @@ namespace { using AuxType = TangentSpaceMeshWrapper::AuxType; using Builder = TangentSpaceMeshWrapper::Builder; +template +using is_supported_aux_t = TangentSpaceMeshWrapper::is_supported_aux_t; + struct Passthrough { static constexpr int POSITION = 256; static constexpr int UV0 = 257; @@ -184,18 +187,11 @@ struct TangentSpaceMeshWrapper::Impl { return data; } - template - using is_supported_aux_t = - typename std::enable_if::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value>::type; template> T getAux(AuxType attribute) noexcept { size_t const nbytes = getVertexCount() * sizeof(std::remove_pointer_t); auto data = (T) malloc(nbytes); - DO_MESH_IMPL(getAux, data); + DO_MESH_IMPL(getAux, attribute, data); return data; } @@ -234,7 +230,7 @@ struct TangentSpaceMeshWrapper::Builder::Impl { void triangles(uint3 const* triangles) noexcept { DO_BUILDER_IMPL(triangles, triangles); } void triangleCount(size_t count) noexcept { DO_BUILDER_IMPL(triangleCount, count); } - template + template> void aux(AuxType type, T data) { DO_BUILDER_IMPL(aux, type, data); } @@ -295,7 +291,13 @@ Builder& Builder::triangles(uint3 const* triangles) noexcept { return *this; } -template +template Builder& Builder::aux(AuxType attribute, float2* data); +template Builder& Builder::aux(AuxType attribute, float3* data); +template Builder& Builder::aux(AuxType attribute, float4* data); +template Builder& Builder::aux(AuxType attribute, ushort3* data); +template Builder& Builder::aux(AuxType attribute, ushort4* data); + +template Builder& Builder::aux(AuxType type, T data) { mImpl->aux(type, data); return *this; @@ -318,6 +320,12 @@ short4* TangentSpaceMeshWrapper::getQuats() noexcept { return mImpl->getQuats(); uint3* TangentSpaceMeshWrapper::getTriangles() { return mImpl->getTriangles(); } size_t TangentSpaceMeshWrapper::getVertexCount() const noexcept { return mImpl->getVertexCount(); } +template float2* TangentSpaceMeshWrapper::getAux(AuxType attribute) noexcept; +template float3* TangentSpaceMeshWrapper::getAux(AuxType attribute) noexcept; +template float4* TangentSpaceMeshWrapper::getAux(AuxType attribute) noexcept; +template ushort3* TangentSpaceMeshWrapper::getAux(AuxType attribute) noexcept; +template ushort4* TangentSpaceMeshWrapper::getAux(AuxType attribute) noexcept; + template T TangentSpaceMeshWrapper::getAux(AuxType attribute) noexcept { return mImpl->getAux(attribute); diff --git a/libs/gltfio/src/extended/TangentSpaceMeshWrapper.h b/libs/gltfio/src/extended/TangentSpaceMeshWrapper.h index 6aab1574726..2dc2d5e8ddb 100644 --- a/libs/gltfio/src/extended/TangentSpaceMeshWrapper.h +++ b/libs/gltfio/src/extended/TangentSpaceMeshWrapper.h @@ -30,6 +30,12 @@ using namespace math; struct TangentSpaceMeshWrapper { using AuxType = geometry::TangentSpaceMesh::AuxAttribute; + template + using is_supported_aux_t = typename std::enable_if< + std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value || + std::is_same::value>::type; + struct Builder { struct Impl; @@ -42,8 +48,10 @@ struct TangentSpaceMeshWrapper { Builder& positions(float3 const* positions) noexcept; Builder& triangleCount(size_t triangleCount) noexcept; Builder& triangles(uint3 const* triangles) noexcept; - template + + template> Builder& aux(AuxType type, T data); + TangentSpaceMeshWrapper* build(); private: @@ -51,7 +59,7 @@ struct TangentSpaceMeshWrapper { }; explicit TangentSpaceMeshWrapper() = default; - + static void destroy(TangentSpaceMeshWrapper* mesh); float3* getPositions() noexcept; diff --git a/libs/utils/CMakeLists.txt b/libs/utils/CMakeLists.txt index e19a943c185..928e2f37755 100644 --- a/libs/utils/CMakeLists.txt +++ b/libs/utils/CMakeLists.txt @@ -80,6 +80,7 @@ if (WIN32) endif() if (ANDROID) list(APPEND SRCS src/android/ThermalManager.cpp) + list(APPEND SRCS src/android/PerformanceHintManager.cpp) list(APPEND SRCS src/android/Systrace.cpp) endif() if (LINUX OR ANDROID) diff --git a/libs/utils/include/utils/StructureOfArrays.h b/libs/utils/include/utils/StructureOfArrays.h index a430958470c..c0b2315eccf 100644 --- a/libs/utils/include/utils/StructureOfArrays.h +++ b/libs/utils/include/utils/StructureOfArrays.h @@ -368,7 +368,7 @@ class StructureOfArraysBase { size_t last = mSize++; // Fold expression on the comma operator ([&]{ - new(std::get(mArrays) + last) Elements{std::get(args)}; + new(std::get(mArrays) + last) Elements{std::get(std::forward(args))}; }() , ...); } @@ -513,7 +513,7 @@ class StructureOfArraysBase { return (soa.elementAt(i) = other); } UTILS_ALWAYS_INLINE Type const& operator = (Type&& other) noexcept { - return (soa.elementAt(i) = other); + return (soa.elementAt(i) = std::forward(other)); } // comparisons UTILS_ALWAYS_INLINE bool operator==(Type const& other) const { diff --git a/libs/utils/include/utils/algorithm.h b/libs/utils/include/utils/algorithm.h index ea5ca44fd5c..7a747b84ceb 100644 --- a/libs/utils/include/utils/algorithm.h +++ b/libs/utils/include/utils/algorithm.h @@ -22,6 +22,7 @@ #include // for std::enable_if #include +#include #include namespace utils { @@ -43,9 +44,15 @@ constexpr inline T clz(T x) noexcept { static_assert(sizeof(T) * CHAR_BIT <= 128, "details::clz() only support up to 128 bits"); x |= (x >> 1u); x |= (x >> 2u); - x |= (x >> 4u); - x |= (x >> 8u); - x |= (x >> 16u); + if constexpr (sizeof(T) * CHAR_BIT >= 8) { // just to silence compiler warning + x |= (x >> 4u); + } + if constexpr (sizeof(T) * CHAR_BIT >= 16) { // just to silence compiler warning + x |= (x >> 8u); + } + if constexpr (sizeof(T) * CHAR_BIT >= 32) { // just to silence compiler warning + x |= (x >> 16u); + } if constexpr (sizeof(T) * CHAR_BIT >= 64) { // just to silence compiler warning x |= (x >> 32u); } @@ -67,11 +74,15 @@ constexpr inline T ctz(T x) noexcept { x &= -x; #endif if (x) c--; - if (sizeof(T) * CHAR_BIT >= 64) { + if constexpr (sizeof(T) * CHAR_BIT >= 64) { if (x & T(0x00000000FFFFFFFF)) c -= 32; } - if (x & T(0x0000FFFF0000FFFF)) c -= 16; - if (x & T(0x00FF00FF00FF00FF)) c -= 8; + if constexpr (sizeof(T) * CHAR_BIT >= 32) { + if (x & T(0x0000FFFF0000FFFF)) c -= 16; + } + if constexpr (sizeof(T) * CHAR_BIT >= 16) { + if (x & T(0x00FF00FF00FF00FF)) c -= 8; + } if (x & T(0x0F0F0F0F0F0F0F0F)) c -= 4; if (x & T(0x3333333333333333)) c -= 2; if (x & T(0x5555555555555555)) c -= 1; @@ -80,6 +91,24 @@ constexpr inline T ctz(T x) noexcept { } // namespace details +constexpr inline UTILS_PUBLIC UTILS_PURE +unsigned int UTILS_ALWAYS_INLINE clz(unsigned char x) noexcept { +#if __has_builtin(__builtin_clz) + return __builtin_clz((unsigned int)x) - 24; +#else + return details::clz(x); +#endif +} + +constexpr inline UTILS_PUBLIC UTILS_PURE +unsigned int UTILS_ALWAYS_INLINE clz(unsigned short x) noexcept { +#if __has_builtin(__builtin_clz) + return __builtin_clz((unsigned int)x) - 16; +#else + return details::clz(x); +#endif +} + constexpr inline UTILS_PUBLIC UTILS_PURE unsigned int UTILS_ALWAYS_INLINE clz(unsigned int x) noexcept { #if __has_builtin(__builtin_clz) @@ -107,6 +136,24 @@ unsigned long long UTILS_ALWAYS_INLINE clz(unsigned long long x) noexcept { #endif } +constexpr inline UTILS_PUBLIC UTILS_PURE +unsigned int UTILS_ALWAYS_INLINE ctz(unsigned char x) noexcept { +#if __has_builtin(__builtin_ctz) + return __builtin_ctz(x); +#else + return details::ctz(x); +#endif +} + +constexpr inline UTILS_PUBLIC UTILS_PURE +unsigned int UTILS_ALWAYS_INLINE ctz(unsigned short x) noexcept { +#if __has_builtin(__builtin_ctz) + return __builtin_ctz(x); +#else + return details::ctz(x); +#endif +} + constexpr inline UTILS_PUBLIC UTILS_PURE unsigned int UTILS_ALWAYS_INLINE ctz(unsigned int x) noexcept { #if __has_builtin(__builtin_ctz) @@ -134,6 +181,24 @@ unsigned long long UTILS_ALWAYS_INLINE ctz(unsigned long long x) noexcept { #endif } +constexpr inline UTILS_PUBLIC UTILS_PURE +unsigned int UTILS_ALWAYS_INLINE popcount(unsigned char x) noexcept { +#if __has_builtin(__builtin_popcount) + return __builtin_popcount(x); +#else + return details::popcount(x); +#endif +} + +constexpr inline UTILS_PUBLIC UTILS_PURE +unsigned int UTILS_ALWAYS_INLINE popcount(unsigned short x) noexcept { +#if __has_builtin(__builtin_popcount) + return __builtin_popcount(x); +#else + return details::popcount(x); +#endif +} + constexpr inline UTILS_PUBLIC UTILS_PURE unsigned int UTILS_ALWAYS_INLINE popcount(unsigned int x) noexcept { #if __has_builtin(__builtin_popcount) @@ -161,11 +226,6 @@ unsigned long long UTILS_ALWAYS_INLINE popcount(unsigned long long x) noexcept { #endif } -constexpr inline UTILS_PUBLIC UTILS_PURE -uint8_t UTILS_ALWAYS_INLINE popcount(uint8_t x) noexcept { - return (uint8_t)popcount((unsigned int)x); -} - template::value && std::is_unsigned::value>> constexpr inline UTILS_PUBLIC UTILS_PURE diff --git a/libs/utils/include/utils/android/PerformanceHintManager.h b/libs/utils/include/utils/android/PerformanceHintManager.h new file mode 100644 index 00000000000..9808186475c --- /dev/null +++ b/libs/utils/include/utils/android/PerformanceHintManager.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TNT_UTILS_ANDROID_PERFORMANCEHINTMANAGER_H +#define TNT_UTILS_ANDROID_PERFORMANCEHINTMANAGER_H + +#include +#include + +#include +#include + +namespace utils { + +namespace details { +struct PerformanceHintManager; +} // namespace details + +class UTILS_PUBLIC PerformanceHintManager : + private PrivateImplementation { + friend struct details::PerformanceHintManager; + struct SessionDetails; + +public: + class UTILS_PUBLIC Session : PrivateImplementation { + friend class PerformanceHintManager; + friend struct PerformanceHintManager::SessionDetails; + public: + Session() noexcept; + Session(PerformanceHintManager& manager, + int32_t const* threadIds, size_t size, + int64_t initialTargetWorkDurationNanos) noexcept; + ~Session() noexcept; + + Session(Session&& rhs) noexcept; + Session& operator=(Session&& rhs) noexcept; + Session(Session const& rhs) = delete; + Session& operator=(Session const& rhs) = delete; + + bool isValid() const; + int updateTargetWorkDuration(int64_t targetDurationNanos) noexcept; + int reportActualWorkDuration(int64_t actualDurationNanos) noexcept; + }; + + PerformanceHintManager() noexcept; + ~PerformanceHintManager() noexcept; + + bool isValid() const; + + int64_t getPreferredUpdateRateNanos() const noexcept; +}; + +} // namespace utils + +#endif //TNT_UTILS_ANDROID_PERFORMANCEHINTMANAGER_H diff --git a/libs/utils/src/android/PerformanceHintManager.cpp b/libs/utils/src/android/PerformanceHintManager.cpp new file mode 100644 index 00000000000..c2c8f8b8f15 --- /dev/null +++ b/libs/utils/src/android/PerformanceHintManager.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include +#include + +#define UTILS_PRIVATE_IMPLEMENTATION_NON_COPYABLE +#include + +namespace utils { + +namespace details { +struct PerformanceHintManager { + APerformanceHintManager* mManager = nullptr; +}; +} // namespace details + + +struct PerformanceHintManager::SessionDetails { + APerformanceHintSession* mSession = nullptr; +}; + +PerformanceHintManager::PerformanceHintManager() noexcept { + if (__builtin_available(android __ANDROID_API_T__, *)) { + mImpl->mManager = APerformanceHint_getManager(); + } +} + +PerformanceHintManager::~PerformanceHintManager() noexcept = default; + +bool PerformanceHintManager::isValid() const { + return mImpl->mManager != nullptr; +} + +int64_t PerformanceHintManager::getPreferredUpdateRateNanos() const noexcept { + if (__builtin_available(android __ANDROID_API_T__, *)) { + if (UTILS_LIKELY(mImpl->mManager)) { + return APerformanceHint_getPreferredUpdateRateNanos(mImpl->mManager); + } + } + return -1; +} + +// ------------------------------------------------------------------------------------------------ + +PerformanceHintManager::Session::Session() noexcept = default; + +PerformanceHintManager::Session::Session(PerformanceHintManager& manager, int32_t const* threadIds, + size_t size, int64_t initialTargetWorkDurationNanos) noexcept { + if (__builtin_available(android __ANDROID_API_T__, *)) { + if (UTILS_LIKELY(manager.isValid())) { + mImpl->mSession = APerformanceHint_createSession( + manager->mManager, threadIds, size, initialTargetWorkDurationNanos); + } + } +} + +PerformanceHintManager::Session::Session(Session&& rhs) noexcept = default; + +PerformanceHintManager::Session& PerformanceHintManager::Session::operator=(Session&& rhs) noexcept = default; + +PerformanceHintManager::Session::~Session() noexcept { + if (__builtin_available(android __ANDROID_API_T__, *)) { + if (UTILS_LIKELY(mImpl->mSession)) { + APerformanceHint_closeSession(mImpl->mSession); + } + } +} + +bool PerformanceHintManager::Session::isValid() const { + return mImpl->mSession != nullptr; +} + +int PerformanceHintManager::Session::updateTargetWorkDuration( + int64_t targetDurationNanos) noexcept { + if (__builtin_available(android __ANDROID_API_T__, *)) { + if (UTILS_LIKELY(mImpl->mSession)) { + return APerformanceHint_updateTargetWorkDuration(mImpl->mSession, targetDurationNanos); + } + } + return -1; +} + +int PerformanceHintManager::Session::reportActualWorkDuration( + int64_t actualDurationNanos) noexcept { + if (__builtin_available(android __ANDROID_API_T__, *)) { + if (UTILS_LIKELY(mImpl->mSession)) { + return APerformanceHint_updateTargetWorkDuration(mImpl->mSession, actualDurationNanos); + } + } + return -1; +} + +} // namespace utils + diff --git a/libs/utils/test/test_algorithm.cpp b/libs/utils/test/test_algorithm.cpp index fa36aff0dce..83dcc7b65bc 100644 --- a/libs/utils/test/test_algorithm.cpp +++ b/libs/utils/test/test_algorithm.cpp @@ -20,6 +20,8 @@ #include +#include + using namespace utils; template @@ -58,6 +60,20 @@ TEST(AlgorithmTest, clz) { EXPECT_EQ(j, details::clz(i|1)); EXPECT_EQ(j, count_leading_zeros(i)); } + for (uint16_t i = 1, j = 15; j < 16; i *= 2, j--) { + EXPECT_EQ(j, clz(i)); + EXPECT_EQ(j, clz(uint16_t(i|1))); + EXPECT_EQ(j, details::clz(i)); + EXPECT_EQ(j, details::clz(uint16_t(i|1))); + EXPECT_EQ(j, count_leading_zeros(i)); + } + for (uint8_t i = 1, j = 7; j < 8; i *= 2, j--) { + EXPECT_EQ(j, clz(i)); + EXPECT_EQ(j, clz(uint8_t(i|1))); + EXPECT_EQ(j, details::clz(i)); + EXPECT_EQ(j, details::clz(uint8_t(i|1))); + EXPECT_EQ(j, count_leading_zeros(i)); + } } TEST(AlgorithmTest, details_ctz) { @@ -80,6 +96,16 @@ TEST(AlgorithmTest, ctz) { EXPECT_EQ(j, details::ctz(i)); EXPECT_EQ(j, count_trailing_zeros(i)); } + for (uint16_t i = 1, j = 0; j < 16; i *= 2, j++) { + EXPECT_EQ(j, ctz(i)); + EXPECT_EQ(j, details::ctz(i)); + EXPECT_EQ(j, count_trailing_zeros(i)); + } + for (uint8_t i = 1, j = 0; j < 8; i *= 2, j++) { + EXPECT_EQ(j, ctz(i)); + EXPECT_EQ(j, details::ctz(i)); + EXPECT_EQ(j, count_trailing_zeros(i)); + } } TEST(AlgorithmTest, details_popcount) { diff --git a/web/filament-js/package.json b/web/filament-js/package.json index df1e4ffbd88..9523529679f 100644 --- a/web/filament-js/package.json +++ b/web/filament-js/package.json @@ -1,6 +1,6 @@ { "name": "filament", - "version": "1.51.6", + "version": "1.51.7", "description": "Real-time physically based rendering engine", "main": "filament.js", "module": "filament.js",