diff --git a/CMakeLists.txt b/CMakeLists.txt index 4840f174f..8030b9727 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required (VERSION 3.12) -set (OSL_VERSION "1.13.3.0") +set (OSL_VERSION "1.13.4.0") set (OSL_VERSION_OVERRIDE "" CACHE STRING "Version override (use with caution)!") mark_as_advanced (OSL_VERSION_OVERRIDE) diff --git a/src/cmake/testing.cmake b/src/cmake/testing.cmake index 642a88e49..526a055e5 100644 --- a/src/cmake/testing.cmake +++ b/src/cmake/testing.cmake @@ -311,7 +311,8 @@ macro (osl_add_all_tests) pragma-nowarn printf-reg printf-whole-array - raytype raytype-reg raytype-specialized regex-reg reparam + raytype raytype-reg raytype-specialized regex-reg + reparam reparam-arrays render-background render-bumptest render-cornell render-furnace-diffuse render-mx-furnace-burley-diffuse diff --git a/src/include/OSL/device_ptr.h b/src/include/OSL/device_ptr.h new file mode 100644 index 000000000..9eaab7634 --- /dev/null +++ b/src/include/OSL/device_ptr.h @@ -0,0 +1,72 @@ +// Copyright Contributors to the Open Shading Language project. +// SPDX-License-Identifier: BSD-3-Clause +// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + +#pragma once + +#include + + + +OSL_NAMESPACE_ENTER + + +/// Wrapper class for holding a "device" pointer -- GPU or whatnot. It +/// provides protections so that the pointer cannot easily be accessed on the +/// host side, where presumably it would not be valid memory. +template class device_ptr { +public: + device_ptr() = default; + + // Copy ctr from another device_ptr of the same type + device_ptr(const device_ptr& other) = default; + + /// On device, device_ptr can construct from a pointer. + /// On host, it must be explicitly constructed -- use with caution. +#ifdef __CUDA_ARCH__ + device_ptr(T* ptr) : m_ptr(ptr) {} +#else + explicit device_ptr(T* ptr) : m_ptr(ptr) {} +#endif + +#ifdef __CUDA_ARCH__ + /// On device, act as a pointer. None of these things are allowed on the + /// host. + T* operator->() const + { + return m_ptr; + } + T& operator*() const + { + return *m_ptr; + } +#endif + + /// Extract the raw device-side pointer. Use with caution! On the host, + /// this will not point to valid memory. + T* d_get() const + { + return m_ptr; + } + + /// Evaluate as bool is a null pointer check. + operator bool() const noexcept + { + return m_ptr != nullptr; + } + + /// Reset the pointer to `dptr`, which must be a device-side raw pointer, + /// or null. Since this device_ptr is non-owning, any previous value is + /// simply overwritten. + void reset(T* dptr = nullptr) + { + m_ptr = dptr; + } + +private: + T* m_ptr = nullptr; // underlying pointer, initializes to null +}; + + + +OSL_NAMESPACE_EXIT diff --git a/src/include/OSL/oslconfig.h.in b/src/include/OSL/oslconfig.h.in index 4a582be05..bdd962908 100644 --- a/src/include/OSL/oslconfig.h.in +++ b/src/include/OSL/oslconfig.h.in @@ -241,6 +241,7 @@ enum class SymArena { Heap, // Belongs to context heap Outputs, // Belongs to output arena UserData, // UserData arena + Interactive, // Interactively edited variables // ShaderGlobals, // RESERVED }; diff --git a/src/include/OSL/oslexec.h b/src/include/OSL/oslexec.h index 2199006bc..f4c3576e8 100644 --- a/src/include/OSL/oslexec.h +++ b/src/include/OSL/oslexec.h @@ -509,6 +509,14 @@ class OSLEXECPUBLIC ShadingSystem { /// string pickle Retrieves a serialized representation /// of the shader group declaration. /// int llvm_groupdata_size Size of the GroupData struct. + /// ptr interactive_params Pointer to the memory block containing + /// host-side interactive parameter values + /// for this shader group. + /// ptr device_interactive_params + /// Pointer to the memory block containing + /// device-side interactive parameter values + /// for this shader group. + /// /// Note: the attributes referred to as "string" are actually on the app /// side as ustring or const char* (they have the same data layout), NOT /// std::string! diff --git a/src/include/OSL/rendererservices.h b/src/include/OSL/rendererservices.h index fe37bd4fd..11ac04f37 100644 --- a/src/include/OSL/rendererservices.h +++ b/src/include/OSL/rendererservices.h @@ -400,18 +400,44 @@ class OSLEXECPUBLIC RendererServices { /// Return a pointer to the texture system (if available). virtual TextureSystem* texturesys() const; - virtual uint64_t register_global(const std::string& var_name, - uint64_t value) + /// Allocate `size` bytes of memory on the device that will execute the + /// shaders. (Equivalent to malloc() on the CPU.) + virtual void* device_alloc(size_t size) { - return 0; + return nullptr; + // Note: for an OptiX-based renderer, this method should be overriden + // with something like: + // + // void* dptr; + // auto r = cudaMalloc(&dptr, size); + // return r == cudaSuccess ? dptr : nullptr; } - virtual bool fetch_global(const std::string& var_name, uint64_t* value) + /// Free a previous allocation (by `device_alloc()`) on the device that + /// will execute the shaders. (Equivalent to free() on the CPU.) + virtual void device_free(void* ptr) { - return false; + // Note: for an OptiX-based renderer, this method should be overriden + // with something like: + // + // cudaFree(ptr); } - + /// Copy `size` bytes from location `src_host` on the host/CPU (the + /// machine making this call) into location `dst_device` on the device + /// executing shaders. (Equivalent to `memcpy(dst, src, size)` on the + /// CPU.) + virtual void* copy_to_device(void* dst_device, const void* src_host, + size_t size) + { + return nullptr; + // Note: for an OptiX-based renderer, this method should be overriden + // with something like: + // + // auto r = cudaMemcpy(dst_device, src_host, size, + // cudaMemcpyHostToDevice); + // return dst_device; + } /// Options we use for noise calls. struct NoiseOpt { diff --git a/src/include/optix_compat.h b/src/include/optix_compat.h index bf54c7215..05a959363 100644 --- a/src/include/optix_compat.h +++ b/src/include/optix_compat.h @@ -18,6 +18,7 @@ # include #endif +#include #if !OSL_USE_OPTIX && !defined(__CUDA_ARCH__) using CUdeviceptr = void*; diff --git a/src/include/osl_pvt.h b/src/include/osl_pvt.h index bbf667ab4..cbb1f7637 100644 --- a/src/include/osl_pvt.h +++ b/src/include/osl_pvt.h @@ -632,7 +632,18 @@ class Symbol { OSL_ASSERT(arena == SymArena::Absolute); m_arena = static_cast(arena); m_data = ptr; - // m_dataoffset = static_cast((char*)ptr - (char*)0); + } + + /// Specify the location of the symbol's data, relative to an arena + /// (which for now must be Absolute). + void set_dataptr(SymArena arena, void* ptr, int offset) + { + // OSL_ASSERT(arena == SymArena::Absolute); + m_arena = static_cast(arena); + m_data = ptr; + m_dataoffset = offset; + OSL::print("setting sym {} arena {} offset {}\n", name(), int(m_arena), + m_dataoffset); } diff --git a/src/liboslexec/backendllvm.cpp b/src/liboslexec/backendllvm.cpp index 4ec18c49c..04967e1c2 100644 --- a/src/liboslexec/backendllvm.cpp +++ b/src/liboslexec/backendllvm.cpp @@ -226,6 +226,15 @@ BackendLLVM::getLLVMSymbolBase(const Symbol& sym) llvm_type(sym.typespec().elementtype())); return result; } + if (sym.symtype() == SymTypeParam && sym.interactive()) { + // Special case for interactively-edited parameters -- they live in + // the interactive data block for the group. + // Generate the pointer to this symbol by offsetting into the + // interactive data block. + int offset = group().interactive_param_offset(layer(), sym.name()); + return ll.offset_ptr(m_llvm_interactive_params_ptr, offset, + llvm_ptr_type(sym.typespec().elementtype())); + } if (sym.symtype() == SymTypeParam || sym.symtype() == SymTypeOutputParam) { // Special case for params -- they live in the group data @@ -334,14 +343,7 @@ BackendLLVM::getOrAllocateCUDAVariable(const Symbol& sym) OSL_ASSERT(use_optix() && "This function is only supported when using OptiX!"); - // We need to sanitize the symbol name for PTX compatibility. Also, if the - // sym name starts with a dollar sign, which are not allowed in PTX - // variable names, then prepend another underscore. - std::string sym_name = Strutil::replace(sym.name(), ".", "_", true); - - std::string name - = fmtformat("{}{}_{}_{}_{}", sym_name.front() == '$' ? "_" : "", - sym_name, group().name(), inst()->layername(), sym.layer()); + std::string name = global_unique_symname(sym); // Return the Value if it has already been allocated auto it = get_const_map().find(name); diff --git a/src/liboslexec/backendllvm.h b/src/liboslexec/backendllvm.h index fe0caa209..16e1a583a 100644 --- a/src/liboslexec/backendllvm.h +++ b/src/liboslexec/backendllvm.h @@ -217,6 +217,20 @@ class BackendLLVM final : public OSOProcessorBase { llvm::Value* getOrAllocateLLVMSymbol(const Symbol& sym); #if OSL_USE_OPTIX + /// Return a globally unique (to the JIT module) name for symbol `sym`, + /// assuming it's part of the currently examined layer of the group. + std::string global_unique_symname(const Symbol& sym) + { + // We need to sanitize the symbol name for PTX compatibility. Also, if + // the sym name starts with a dollar sign, which are not allowed in + // PTX variable names, then prepend another underscore. + auto sym_name = Strutil::replace(sym.name(), ".", "_", true); + int layer = sym.layer(); + const ShaderInstance* inst_ = group()[layer]; + return fmtformat("{}{}_{}_{}_{}", sym_name.front() == '$' ? "_" : "", + sym_name, group().name(), inst_->layername(), layer); + } + /// Allocate a CUDA variable for the given OSL symbol and return a pointer /// to the corresponding LLVM GlobalVariable, or return the pointer if it /// has already been allocated. @@ -449,6 +463,13 @@ class BackendLLVM final : public OSOProcessorBase { return ll.llvm_type(llvm_typedesc(typespec)); } + /// Generate the appropriate llvm type definition for a pointer to + /// the type specified by the TypeSpec. + llvm::Type* llvm_ptr_type(const TypeSpec& typespec) + { + return ll.type_ptr(ll.llvm_type(llvm_typedesc(typespec))); + } + /// Generate the parameter-passing llvm type definition for an OSL /// TypeSpec. llvm::Type* llvm_pass_type(const TypeSpec& typespec); @@ -559,6 +580,7 @@ class BackendLLVM final : public OSOProcessorBase { std::map m_param_order_map; llvm::Value* m_llvm_shaderglobals_ptr; llvm::Value* m_llvm_groupdata_ptr; + llvm::Value* m_llvm_interactive_params_ptr; llvm::Value* m_llvm_userdata_base_ptr; llvm::Value* m_llvm_output_base_ptr; llvm::Value* m_llvm_shadeindex; diff --git a/src/liboslexec/batched_backendllvm.cpp b/src/liboslexec/batched_backendllvm.cpp index 07a582036..f74021f4b 100644 --- a/src/liboslexec/batched_backendllvm.cpp +++ b/src/liboslexec/batched_backendllvm.cpp @@ -448,6 +448,16 @@ BatchedBackendLLVM::getLLVMSymbolBase(const Symbol& sym) return result; } + if (sym.symtype() == SymTypeParam && sym.interactive()) { + // Special case for interactively-edited parameters -- they live in + // the interactive data block for the group. + // Generate the pointer to this symbol by offsetting into the + // interactive data block. + int offset = group().interactive_param_offset(layer(), sym.name()); + return ll.offset_ptr(m_llvm_interactive_params_ptr, offset, + llvm_ptr_type(sym.typespec().elementtype())); + } + if (sym.symtype() == SymTypeParam || sym.symtype() == SymTypeOutputParam) { // Special case for params -- they live in the group data int fieldnum = m_param_order_map[&sym]; diff --git a/src/liboslexec/batched_backendllvm.h b/src/liboslexec/batched_backendllvm.h index f10e8de56..3d316b173 100644 --- a/src/liboslexec/batched_backendllvm.h +++ b/src/liboslexec/batched_backendllvm.h @@ -6,8 +6,6 @@ #pragma once #include -#include -#include #include #include "oslexec_pvt.h" @@ -15,7 +13,7 @@ using namespace OSL; using namespace OSL::pvt; -#include "OSL/llvm_util.h" +#include #include "runtimeoptimize.h" #include @@ -658,6 +656,14 @@ class BatchedBackendLLVM : public OSOProcessorBase { return ll.llvm_vector_type(llvm_typedesc(typespec)); } + /// Generate the appropriate llvm type definition for a pointer to + /// the type specified by the TypeSpec. + llvm::Type* llvm_ptr_type(const TypeSpec& typespec) + { + return reinterpret_cast( + ll.type_ptr(ll.llvm_type(llvm_typedesc(typespec)))); + } + /// Generate the parameter-passing llvm type definition for an OSL /// TypeSpec. llvm::Type* llvm_pass_type(const TypeSpec& typespec); @@ -783,7 +789,7 @@ class BatchedBackendLLVM : public OSOProcessorBase { std::map m_param_order_map; llvm::Value* m_llvm_shaderglobals_ptr; llvm::Value* m_llvm_groupdata_ptr; - + llvm::Value* m_llvm_interactive_params_ptr; llvm::Value* m_llvm_wide_shadeindex_ptr; llvm::Value* m_llvm_userdata_base_ptr; llvm::Value* m_llvm_output_base_ptr; diff --git a/src/liboslexec/batched_llvm_gen.cpp b/src/liboslexec/batched_llvm_gen.cpp index dcd37da1f..19cbb4ead 100644 --- a/src/liboslexec/batched_llvm_gen.cpp +++ b/src/liboslexec/batched_llvm_gen.cpp @@ -92,13 +92,17 @@ BatchedBackendLLVM::llvm_call_layer(int layer, bool unconditional) << " unconditional=" << unconditional << std::endl); // Make code that looks like: // if (! groupdata->run[parentlayer]) - // parent_layer (sg, groupdata); + // parent_layer (sg, groupdata, wide_shadeindex_ptr, + // userdata_base_ptr, output_base_ptr, + // execution_mask, interactive_params_ptr); // if it's a conditional call, or - // parent_layer (sg, groupdata); + // parent_layer (sg, groupdata, wide_shadeindex_ptr, + // userdata_base_ptr, output_base_ptr, + // execution_mask, interactive_params_ptr); // if it's run unconditionally. // The code in the parent layer itself will set its 'executed' flag. - llvm::Value* args[6]; + llvm::Value* args[7]; args[0] = sg_ptr(); args[1] = groupdata_ptr(); args[2] = ll.void_ptr(wide_shadeindex_ptr()); @@ -120,13 +124,11 @@ BatchedBackendLLVM::llvm_call_layer(int layer, bool unconditional) llvm::Value* execution_required = ll.op_ne(lanes_requiring_execution_value, ll.constant(0)); then_block = ll.new_basic_block( - llvm_debug() - ? std::string("then layer ").append(std::to_string(layer)) - : std::string()); + llvm_debug() ? Strutil::fmt::format("then layer {}", layer) + : std::string()); after_block = ll.new_basic_block( - llvm_debug() - ? std::string("after layer ").append(std::to_string(layer)) - : std::string()); + llvm_debug() ? Strutil::fmt::format("after layer {}", layer) + : std::string()); ll.op_branch(execution_required, then_block, after_block); // insert point is now then_block } else { @@ -134,6 +136,7 @@ BatchedBackendLLVM::llvm_call_layer(int layer, bool unconditional) } args[5] = lanes_requiring_execution_value; + args[6] = m_llvm_interactive_params_ptr; // Before the merge, keeping in case we broke it //std::string name = fmtformat("{}_{}_{}", m_library_selector, parent->layername().c_str(), @@ -247,7 +250,7 @@ LLVMGEN(llvm_gen_useparam) // initializing them lazily, now we have to do it. if ((sym.symtype() == SymTypeParam || sym.symtype() == SymTypeOutputParam) - && !sym.lockgeom() && !sym.typespec().is_closure() + && sym.interpolated() && !sym.typespec().is_closure() && !sym.connected() && !sym.connected_down() && rop.shadingsys().lazy_userdata()) { rop.llvm_assign_initial_value(sym, rop.ll.mask_as_int( diff --git a/src/liboslexec/batched_llvm_instance.cpp b/src/liboslexec/batched_llvm_instance.cpp index c59b22ceb..de591aea7 100644 --- a/src/liboslexec/batched_llvm_instance.cpp +++ b/src/liboslexec/batched_llvm_instance.cpp @@ -63,8 +63,19 @@ Schematically, we want to create code that resembles the following: float param_1_bar; }; + // Data for the interactively adjusted parameters of this group -- these + // can't be turned into constants because the app may want to modify them + // as it runs (such as for user interaction). This block of memory has + // one global copy specific each the shader group, managed by OSL. + struct InteractiveParams { + float iparam_0_baz; + }; + // Name of layer entry is $layer_ID - void $layer_0 (ShaderGlobals *sg, GroupData_1 *group) + void $layer_0(ShaderGlobals *sg, GroupData_1 *group, + wide_int* wide_shadeindex_ptr, + void* userdatda_base_ptr, void* output_base_ptr, + int mask, InteractiveParams* interactive_params) { // Declare locals, temps, constants, params with known values. // Make them all look like stack memory locations: @@ -74,9 +85,14 @@ Schematically, we want to create code that resembles the following: // then run the shader body: *x = sg->u * group->param_0_bar; group->param_1_foo = *x; + *x += interactive_params->iparam_0_baz; + // ... } - void $layer_1 (ShaderGlobals *sg, GroupData_1 *group) + void $layer_1(ShaderGlobals *sg, GroupData_1 *group, + wide_int* wide_shadeindex_ptr, + void* userdatda_base_ptr, void* output_base_ptr, + int mask, InteractiveParams* interactive_params) { // Because we need the outputs of layer 0 now, we call it if it // hasn't already run: @@ -87,7 +103,10 @@ Schematically, we want to create code that resembles the following: *y = sg->u * group->$param_1_bar; } - void $group_1 (ShaderGlobals *sg, GroupData_1 *group) + void $group_1(ShaderGlobals *sg, GroupData_1 *group, + wide_int* wide_shadeindex_ptr, + void* userdatda_base_ptr, void* output_base_ptr, + int mask, InteractiveParams* interactive_params) { group->layer_run[...] = 0; // Run just the unconditional layers @@ -901,14 +920,13 @@ BatchedBackendLLVM::llvm_type_groupdata() if (offset & (align - 1)) offset += align - (offset & (align - 1)); if (llvm_debug() >= 2) - std::cout << " " << inst->layername() << " (" << inst->id() - << ") " << sym.mangled() << " " << ts.c_str() - << ", field " << order << ", size " - << derivSize * int(sym.size()) << ", offset " - << offset << std::endl; + print(" {} ({}) {} {}, field {}, size {}, offset {}{}{}\n", + inst->layername(), inst->id(), sym.mangled(), ts.c_str(), + order, derivSize * int(sym.size()), offset, + sym.interpolated() ? " (interpolated)" : "", + sym.interactive() ? " (interactive)" : ""); sym.wide_dataoffset((int)offset); offset += derivSize * int(sym.size()) * m_width; - m_param_order_map[&sym] = order; ++order; } @@ -1041,10 +1059,10 @@ BatchedBackendLLVM::llvm_assign_initial_value( llvm_assign_zero(sym); return; // we're done, the parts below are just for params } - ASSERT_MSG(sym.symtype() == SymTypeParam - || sym.symtype() == SymTypeOutputParam, - "symtype was %d, data type was %s", (int)sym.symtype(), - sym.typespec().c_str()); + OSL_ASSERT_MSG(sym.symtype() == SymTypeParam + || sym.symtype() == SymTypeOutputParam, + "symtype was %d, data type was %s", (int)sym.symtype(), + sym.typespec().c_str()); // Handle interpolated params by calling osl_bind_interpolated_param, // which will check if userdata is already retrieved, if not it will @@ -1059,7 +1077,7 @@ BatchedBackendLLVM::llvm_assign_initial_value( bool partial_userdata_mask_was_pushed = false; const SymLocationDesc* symloc = nullptr; LLVM_Util::ScopedMasking partial_data_masking_scope; - if (!sym.lockgeom() && !sym.typespec().is_closure()) { + if (sym.interpolated() && !sym.typespec().is_closure()) { ustring symname = sym.name(); TypeDesc type = sym.typespec().simpletype(); @@ -1217,6 +1235,15 @@ BatchedBackendLLVM::llvm_assign_initial_value( // Handle init ops. build_llvm_code(sym.initbegin(), sym.initend()); +#if 0 /* Is this needed? */ + } else if (sym.interpolated() && !sym.typespec().is_closure()) { + // geometrically-varying param; memcpy its default value + TypeDesc t = sym.typespec().simpletype(); + ll.op_memcpy(llvm_void_ptr(sym), ll.constant_ptr(sym.data()), + t.size(), t.basesize() /*align*/); + if (sym.has_derivs()) + llvm_zero_derivs(sym); +#endif } else { // We think the non-memcpy route is preferable as it give the compiler // a chance to optimize constant values Also memcpy would ignoring the @@ -1738,16 +1765,17 @@ BatchedBackendLLVM::build_llvm_init() OSL_ASSERT(m_library_selector); std::string unique_name = fmtformat("{}_group_{}_init", m_library_selector, group().id()); - ll.current_function(ll.make_function( - unique_name, false, - ll.type_void(), // return type - { - llvm_type_sg_ptr(), llvm_type_groupdata_ptr(), - static_cast(ll.type_void_ptr()), // wide_shader_index - static_cast(ll.type_void_ptr()), // userdata_base_ptr - static_cast(ll.type_void_ptr()), // output_base_ptr - ll.type_int() // mask - })); + ll.current_function( + ll.make_function(unique_name, false, + ll.type_void(), // return type + { + llvm_type_sg_ptr(), llvm_type_groupdata_ptr(), + ll.type_void_ptr(), // wide_shader_index + ll.type_void_ptr(), // userdata_base_ptr + ll.type_void_ptr(), // output_base_ptr + ll.type_int(), // mask + ll.type_void_ptr(), // FIXME: interactive params + })); if (ll.debug_is_enabled()) { ustring file_name @@ -1757,14 +1785,20 @@ BatchedBackendLLVM::build_llvm_init() } // Get shader globals and groupdata pointers - m_llvm_shaderglobals_ptr = ll.current_function_arg(0); //arg_it++; - m_llvm_groupdata_ptr = ll.current_function_arg(1); //arg_it++; + m_llvm_shaderglobals_ptr = ll.current_function_arg(0); //arg_it++; + m_llvm_shaderglobals_ptr->setName("shaderglobals_ptr"); + m_llvm_groupdata_ptr = ll.current_function_arg(1); //arg_it++; + m_llvm_groupdata_ptr->setName("groupdata_ptr"); m_llvm_wide_shadeindex_ptr = ll.current_function_arg(2); //arg_it++; - m_llvm_userdata_base_ptr = ll.current_function_arg(3); //arg_it++; - m_llvm_output_base_ptr = ll.current_function_arg(4); //arg_it++; - - // TODO: do we need to utilize the shader mask in the init function? - //llvm::Value * llvm_initial_shader_mask_value = ll.current_function_arg(5); //arg_it++; + m_llvm_wide_shadeindex_ptr->setName("shadeindex"); + m_llvm_userdata_base_ptr = ll.current_function_arg(3); //arg_it++; + m_llvm_userdata_base_ptr->setName("userdata_base_ptr"); + m_llvm_output_base_ptr = ll.current_function_arg(4); //arg_it++; + m_llvm_output_base_ptr->setName("output_base_ptr"); + llvm::Value* llvm_initial_shader_mask_value = ll.current_function_arg(5); + llvm_initial_shader_mask_value->setName("initial_shader_mask"); + m_llvm_interactive_params_ptr = ll.current_function_arg(6); //arg_it++; + m_llvm_interactive_params_ptr->setName("interactive_params_ptr"); // New function, reset temp matrix pointer m_llvm_temp_wide_matrix_ptr = nullptr; @@ -1875,10 +1909,11 @@ BatchedBackendLLVM::build_llvm_instance(bool groupentry) ll.type_void(), // return type { llvm_type_sg_ptr(), llvm_type_groupdata_ptr(), - static_cast(ll.type_void_ptr()), // wide_shader_index - static_cast(ll.type_void_ptr()), // userdata_base_ptr - static_cast(ll.type_void_ptr()), // output_base_ptr - ll.type_int() // mask + ll.type_void_ptr(), // wide_shader_index + ll.type_void_ptr(), // userdata_base_ptr + ll.type_void_ptr(), // output_base_ptr + ll.type_int(), // mask + ll.type_void_ptr(), // FIXME: interactive params })); if (ll.debug_is_enabled()) { @@ -1888,14 +1923,20 @@ BatchedBackendLLVM::build_llvm_instance(bool groupentry) } // Get shader globals and groupdata pointers - m_llvm_shaderglobals_ptr = ll.current_function_arg(0); //arg_it++; - m_llvm_groupdata_ptr = ll.current_function_arg(1); //arg_it++; + m_llvm_shaderglobals_ptr = ll.current_function_arg(0); //arg_it++; + m_llvm_shaderglobals_ptr->setName("shaderglobals_ptr"); + m_llvm_groupdata_ptr = ll.current_function_arg(1); //arg_it++; + m_llvm_groupdata_ptr->setName("groupdata_ptr"); m_llvm_wide_shadeindex_ptr = ll.current_function_arg(2); //arg_it++; - m_llvm_userdata_base_ptr = ll.current_function_arg(3); //arg_it++; - m_llvm_output_base_ptr = ll.current_function_arg(4); //arg_it++; - - llvm::Value* llvm_initial_shader_mask_value = ll.current_function_arg( - 5); //arg_it++; + m_llvm_wide_shadeindex_ptr->setName("shadeindex"); + m_llvm_userdata_base_ptr = ll.current_function_arg(3); //arg_it++; + m_llvm_userdata_base_ptr->setName("userdata_base_ptr"); + m_llvm_output_base_ptr = ll.current_function_arg(4); //arg_it++; + m_llvm_output_base_ptr->setName("output_base_ptr"); + llvm::Value* llvm_initial_shader_mask_value = ll.current_function_arg(5); + llvm_initial_shader_mask_value->setName("initial_shader_mask"); + m_llvm_interactive_params_ptr = ll.current_function_arg(6); //arg_it++; + m_llvm_interactive_params_ptr->setName("interactive_params_ptr"); // New function, reset temp matrix pointer m_llvm_temp_wide_matrix_ptr = nullptr; @@ -2070,10 +2111,12 @@ BatchedBackendLLVM::build_llvm_instance(bool groupentry) && !s.renderer_output()) continue; // Skip if it's an interpolated (userdata) parameter and we're - // initializing them lazily. - if (s.symtype() == SymTypeParam && !s.lockgeom() - && !s.typespec().is_closure() && !s.connected() - && !s.connected_down() && shadingsys().lazy_userdata()) + // initializing them lazily, or if it's an interactively-adjusted + // parameter. + if (s.symtype() == SymTypeParam && !s.typespec().is_closure() + && !s.connected() && !s.connected_down() + && (s.interactive() + || (s.interpolated() && shadingsys().lazy_userdata()))) continue; // Set initial value for params (may contain init ops) llvm_assign_initial_value(s, llvm_initial_shader_mask_value); diff --git a/src/liboslexec/context.cpp b/src/liboslexec/context.cpp index 2ff919f8e..36e48754b 100644 --- a/src/liboslexec/context.cpp +++ b/src/liboslexec/context.cpp @@ -133,7 +133,7 @@ ShadingContext::execute_init(ShaderGroup& sgroup, int shadeindex, ssg.renderer = renderer(); ssg.Ci = NULL; run_func(&ssg, m_heap.get(), userdata_base_ptr, output_base_ptr, - shadeindex); + shadeindex, sgroup.interactive_arena_ptr()); } if (profile) @@ -160,8 +160,8 @@ ShadingContext::execute_layer(int shadeindex, ShaderGlobals& ssg, if (!run_func) return false; - run_func(&ssg, m_heap.get(), userdata_base_ptr, output_base_ptr, - shadeindex); + run_func(&ssg, m_heap.get(), userdata_base_ptr, output_base_ptr, shadeindex, + group()->interactive_arena_ptr()); if (profile) m_ticks += timer.ticks(); @@ -321,7 +321,8 @@ ShadingContext::Batched::execute_init( run_mask.set_count_on(batch_size); run_func(&bsg, context().m_heap.get(), &wide_shadeindex.data(), - userdata_base_ptr, output_base_ptr, run_mask.value()); + userdata_base_ptr, output_base_ptr, run_mask.value(), + sgroup.interactive_arena_ptr()); } } @@ -360,7 +361,8 @@ ShadingContext::Batched::execute_layer( run_mask.set_count_on(batch_size); run_func(&bsg, context().m_heap.get(), &wide_shadeindex.data(), - userdata_base_ptr, output_base_ptr, run_mask.value()); + userdata_base_ptr, output_base_ptr, run_mask.value(), + group()->interactive_arena_ptr()); } if (profile) diff --git a/src/liboslexec/instance.cpp b/src/liboslexec/instance.cpp index 87e365177..d7bb90dc8 100644 --- a/src/liboslexec/instance.cpp +++ b/src/liboslexec/instance.cpp @@ -732,7 +732,8 @@ ShaderInstance::mergeable(const ShaderInstance& b, -ShaderGroup::ShaderGroup(string_view name) +ShaderGroup::ShaderGroup(string_view name, ShadingSystemImpl& shadingsys) + : m_shadingsys(shadingsys) { m_id = ++(*(atomic_int*)&next_id); if (name.size()) { @@ -745,15 +746,6 @@ ShaderGroup::ShaderGroup(string_view name) -ShaderGroup::ShaderGroup(const ShaderGroup& g, string_view name) - : ShaderGroup(name) // delegate most of the work -{ - m_num_entry_layers = g.m_num_entry_layers; - m_layers = g.m_layers; -} - - - ShaderGroup::~ShaderGroup() { #if 0 @@ -768,6 +760,11 @@ ShaderGroup::~ShaderGroup() << "executed on " << executions() << " points\n"; } #endif + + // Free any GPU memory associated with this group + if (m_device_interactive_arena) + shadingsys().renderer()->device_free( + m_device_interactive_arena.d_get()); } @@ -820,6 +817,34 @@ ShaderGroup::mark_entry_layer(int layer) +void +ShaderGroup::setup_interactive_arena(cspan paramblock) +{ + if (paramblock.size()) { + // CPU side + m_interactive_arena_size = paramblock.size(); + m_interactive_arena.reset(new uint8_t[m_interactive_arena_size]); + memcpy(m_interactive_arena.get(), paramblock.data(), + m_interactive_arena_size); + if (shadingsys().use_optix()) { + // GPU side + RendererServices* rs = shadingsys().renderer(); + m_device_interactive_arena.reset(reinterpret_cast( + rs->device_alloc(m_interactive_arena_size))); + rs->copy_to_device(m_device_interactive_arena.d_get(), + paramblock.data(), m_interactive_arena_size); + // print("group {} has device interactive_params set to {:p}\n", + // name(), m_device_interactive_arena.d_get()); + } + } else { + m_interactive_arena_size = 0; + m_interactive_arena.reset(); + m_device_interactive_arena.reset(); + } +} + + + std::string ShaderGroup::serialize() const { diff --git a/src/liboslexec/llvm_gen.cpp b/src/liboslexec/llvm_gen.cpp index a7d255328..807d7389b 100644 --- a/src/liboslexec/llvm_gen.cpp +++ b/src/liboslexec/llvm_gen.cpp @@ -111,15 +111,16 @@ BackendLLVM::llvm_call_layer(int layer, bool unconditional) // Make code that looks like: // if (! groupdata->run[parentlayer]) // parent_layer (sg, groupdata, userdata_base_ptr, - // output_base_ptr, shadeindex); + // output_base_ptr, shadeindex, interactive_params); // if it's a conditional call, or // parent_layer (sg, groupdata, userdata_base_ptr, - // output_base_ptr, shadeindex); + // output_base_ptr, shadeindex, interactive_params); // if it's run unconditionally. // The code in the parent layer itself will set its 'executed' flag. - llvm::Value* args[] = { sg_ptr(), groupdata_ptr(), userdata_base_ptr(), - output_base_ptr(), shadeindex() }; + llvm::Value* args[] + = { sg_ptr(), groupdata_ptr(), userdata_base_ptr(), + output_base_ptr(), shadeindex(), m_llvm_interactive_params_ptr }; ShaderInstance* parent = group()[layer]; llvm::Value* trueval = ll.constant_bool(true); @@ -238,7 +239,7 @@ LLVMGEN(llvm_gen_useparam) // initializing them lazily, now we have to do it. if ((sym.symtype() == SymTypeParam || sym.symtype() == SymTypeOutputParam) - && !sym.lockgeom() && !sym.typespec().is_closure() + && sym.interpolated() && !sym.typespec().is_closure() && !sym.connected() && !sym.connected_down() && rop.shadingsys().lazy_userdata()) { rop.llvm_assign_initial_value(sym); diff --git a/src/liboslexec/llvm_instance.cpp b/src/liboslexec/llvm_instance.cpp index 1b31494ae..d82c050e9 100644 --- a/src/liboslexec/llvm_instance.cpp +++ b/src/liboslexec/llvm_instance.cpp @@ -57,10 +57,18 @@ Schematically, we want to create code that resembles the following: float param_1_bar; }; + // Data for the interactively adjusted parameters of this group -- these + // can't be turned into constants because the app may want to modify them + // as it runs (such as for user interaction). This block of memory has + // one global copy specific each the shader group, managed by OSL. + struct InteractiveParams { + float iparam_0_baz; + }; + // Name of layer entry is $layer_ID void $layer_0(ShaderGlobals* sg, GroupData* group, void* userdatda_base_ptr, void* output_base_ptr, - int shadeindex) + int shadeindex, InteractiveParams* interactive_params) { // Declare locals, temps, constants, params with known values. // Make them all look like stack memory locations: @@ -70,25 +78,27 @@ Schematically, we want to create code that resembles the following: // then run the shader body: *x = sg->u * group->param_0_bar; group->param_1_foo = *x; + *x += interactive_params->iparam_0_baz; + // ... } void $layer_1(ShaderGlobals* sg, GroupData* group, void* userdatda_base_ptr, void* output_base_ptr, - int shadeindex) + int shadeindex, InteractiveParams* interactive_params) { // Because we need the outputs of layer 0 now, we call it if it // hasn't already run: if (! group->layer_run[0]) { group->layer_run[0] = 1; $layer_0 (sg, group, userdata_base_ptr, output_base_ptr, - shadeindex); // because we need its outputs + shadeindex, interactive_params); // because we need its outputs } *y = sg->u * group->$param_1_bar; } void $group_1(ShaderGlobals* sg, GroupData* group, void* userdatda_base_ptr, void* output_base_ptr, - int shadeindex) + int shadeindex, InteractiveParams* interactive_params) { group->layer_run[...] = 0; // Run just the unconditional layers @@ -96,7 +106,7 @@ Schematically, we want to create code that resembles the following: if (! group->layer_run[1]) { group->layer_run[1] = 1; $layer_1(sg, group, userdata_base_ptr, output_base_ptr, - shadeindex); + shadeindex, interactive_params); } } @@ -324,8 +334,8 @@ BackendLLVM::llvm_type_groupdata() && !sym.connected_down()) { auto found = group().find_symloc(sym.name()); if (found) - OIIO::Strutil::print("layer {} \"{}\" : OUTPUT {}\n", layer, - inst->layername(), found->name); + print("layer {} \"{}\" : OUTPUT {}\n", layer, + inst->layername(), found->name); } // Alignment @@ -335,11 +345,11 @@ BackendLLVM::llvm_type_groupdata() if (offset & (align - 1)) offset += align - (offset & (align - 1)); if (llvm_debug() >= 2) - std::cout << " " << inst->layername() << " (" << inst->id() - << ") " << sym.mangled() << " " << ts.c_str() - << ", field " << order << ", size " - << derivSize * int(sym.size()) << ", offset " - << offset << std::endl; + print(" {} ({}) {} {}, field {}, size {}, offset {}{}{}\n", + inst->layername(), inst->id(), sym.mangled(), ts.c_str(), + order, derivSize * int(sym.size()), offset, + sym.interpolated() ? " (interpolated)" : "", + sym.interactive() ? " (interactive)" : ""); sym.dataoffset((int)offset); // TODO(arenas): sym.set_dataoffset(SymArena::Heap, offset); offset += derivSize * sym.size(); @@ -468,7 +478,7 @@ BackendLLVM::llvm_assign_initial_value(const Symbol& sym, bool force) // such userdata was available. llvm::BasicBlock* after_userdata_block = nullptr; const SymLocationDesc* symloc = nullptr; - if (!sym.lockgeom() && !sym.typespec().is_closure()) { + if (sym.interpolated() && !sym.typespec().is_closure()) { ustring symname = sym.name(); TypeDesc type = sym.typespec().simpletype(); @@ -566,7 +576,7 @@ BackendLLVM::llvm_assign_initial_value(const Symbol& sym, bool force) if (sym.has_derivs()) llvm_zero_derivs(sym); #endif - } else if (!sym.lockgeom() && !sym.typespec().is_closure()) { + } else if (sym.interpolated() && !sym.typespec().is_closure()) { // geometrically-varying param; memcpy its default value TypeDesc t = sym.typespec().simpletype(); ll.op_memcpy(llvm_void_ptr(sym), ll.constant_ptr(sym.data()), @@ -860,10 +870,13 @@ BackendLLVM::build_llvm_init() ll.current_function( ll.make_function(unique_name, false, ll.type_void(), // return type - { llvm_type_sg_ptr(), llvm_type_groupdata_ptr(), - ll.type_void_ptr(), // userdata_base_ptr - ll.type_void_ptr(), // output_base_ptr - ll.type_int() })); + { + llvm_type_sg_ptr(), llvm_type_groupdata_ptr(), + ll.type_void_ptr(), // userdata_base_ptr + ll.type_void_ptr(), // output_base_ptr + ll.type_int(), + ll.type_void_ptr(), // FIXME: interactive params + })); if (ll.debug_is_enabled()) { ustring sourcefile @@ -873,10 +886,17 @@ BackendLLVM::build_llvm_init() // Get shader globals and groupdata pointers m_llvm_shaderglobals_ptr = ll.current_function_arg(0); //arg_it++; - m_llvm_groupdata_ptr = ll.current_function_arg(1); //arg_it++; + m_llvm_shaderglobals_ptr->setName("shaderglobals_ptr"); + m_llvm_groupdata_ptr = ll.current_function_arg(1); //arg_it++; + m_llvm_groupdata_ptr->setName("groupdata_ptr"); m_llvm_userdata_base_ptr = ll.current_function_arg(2); //arg_it++; - m_llvm_output_base_ptr = ll.current_function_arg(3); //arg_it++; - m_llvm_shadeindex = ll.current_function_arg(4); //arg_it++; + m_llvm_userdata_base_ptr->setName("userdata_base_ptr"); + m_llvm_output_base_ptr = ll.current_function_arg(3); //arg_it++; + m_llvm_output_base_ptr->setName("output_base_ptr"); + m_llvm_shadeindex = ll.current_function_arg(4); //arg_it++; + m_llvm_shadeindex->setName("shadeindex"); + m_llvm_interactive_params_ptr = ll.current_function_arg(5); //arg_it++; + m_llvm_interactive_params_ptr->setName("interactive_params_ptr"); // Set up a new IR builder llvm::BasicBlock* entry_bb = ll.new_basic_block(unique_name); @@ -956,10 +976,13 @@ BackendLLVM::build_llvm_instance(bool groupentry) unique_layer_name, !is_entry_layer, // fastcall for non-entry layer functions ll.type_void(), // return type - { llvm_type_sg_ptr(), llvm_type_groupdata_ptr(), - ll.type_void_ptr(), // userdata_base_ptr - ll.type_void_ptr(), // output_base_ptr - ll.type_int() })); + { + llvm_type_sg_ptr(), llvm_type_groupdata_ptr(), + ll.type_void_ptr(), // userdata_base_ptr + ll.type_void_ptr(), // output_base_ptr + ll.type_int(), + ll.type_void_ptr(), // FIXME: interactive_params + })); if (ll.debug_is_enabled()) { const Opcode& mainbegin(inst()->op(inst()->maincodebegin())); @@ -969,10 +992,17 @@ BackendLLVM::build_llvm_instance(bool groupentry) // Get shader globals and groupdata pointers m_llvm_shaderglobals_ptr = ll.current_function_arg(0); //arg_it++; - m_llvm_groupdata_ptr = ll.current_function_arg(1); //arg_it++; + m_llvm_shaderglobals_ptr->setName("shaderglobals_ptr"); + m_llvm_groupdata_ptr = ll.current_function_arg(1); //arg_it++; + m_llvm_groupdata_ptr->setName("groupdata_ptr"); m_llvm_userdata_base_ptr = ll.current_function_arg(2); //arg_it++; - m_llvm_output_base_ptr = ll.current_function_arg(3); //arg_it++; - m_llvm_shadeindex = ll.current_function_arg(4); //arg_it++; + m_llvm_userdata_base_ptr->setName("userdata_base_ptr"); + m_llvm_output_base_ptr = ll.current_function_arg(3); //arg_it++; + m_llvm_output_base_ptr->setName("output_base_ptr"); + m_llvm_shadeindex = ll.current_function_arg(4); //arg_it++; + m_llvm_shadeindex->setName("shadeindex"); + m_llvm_interactive_params_ptr = ll.current_function_arg(5); //arg_it++; + m_llvm_interactive_params_ptr->setName("interactive_params_ptr"); llvm::BasicBlock* entry_bb = ll.new_basic_block(unique_layer_name); m_exit_instance_block = NULL; @@ -1074,10 +1104,12 @@ BackendLLVM::build_llvm_instance(bool groupentry) && !s.renderer_output()) continue; // Skip if it's an interpolated (userdata) parameter and we're - // initializing them lazily. - if (s.symtype() == SymTypeParam && !s.lockgeom() - && !s.typespec().is_closure() && !s.connected() - && !s.connected_down() && shadingsys().lazy_userdata()) + // initializing them lazily, or if it's an interactively-adjusted + // parameter. + if (s.symtype() == SymTypeParam && !s.typespec().is_closure() + && !s.connected() && !s.connected_down() + && (s.interactive() + || (s.interpolated() && shadingsys().lazy_userdata()))) continue; // Set initial value for params (may contain init ops) llvm_assign_initial_value(s); diff --git a/src/liboslexec/llvm_util.cpp b/src/liboslexec/llvm_util.cpp index bfafcf1d2..3b443f751 100644 --- a/src/liboslexec/llvm_util.cpp +++ b/src/liboslexec/llvm_util.cpp @@ -3532,8 +3532,12 @@ LLVM_Util::offset_ptr(llvm::Value* ptr, llvm::Value* offset, llvm::Value* LLVM_Util::offset_ptr(llvm::Value* ptr, int offset, llvm::Type* ptrtype) { - if (offset == 0) - return ptr; // shortcut for 0 offset + if (offset == 0) { + // shortcut for 0 offset + if (ptrtype && ptrtype != type_void_ptr()) + ptr = ptr_cast(ptr, ptrtype); + return ptr; + } return offset_ptr(ptr, constant(size_t(offset)), ptrtype); } diff --git a/src/liboslexec/oslexec_pvt.h b/src/liboslexec/oslexec_pvt.h index 297b4b6f6..c4f27f57d 100644 --- a/src/liboslexec/oslexec_pvt.h +++ b/src/liboslexec/oslexec_pvt.h @@ -33,6 +33,7 @@ #include "osl_pvt.h" +#include #include #include #include @@ -99,14 +100,16 @@ print_closure(std::ostream& out, const ClosureColor* closure, /// group. typedef void (*RunLLVMGroupFunc)(void* shaderglobals, void* heap_arena_ptr, void* userdata_base_pointer, - void* output_base_pointer, int shadeindex); + void* output_base_pointer, int shadeindex, + void* interactive_params_ptr); #if OSL_USE_BATCHED typedef void (*RunLLVMGroupFuncWide)(void* batchedshaderglobals, void* heap_arena_ptr, const void* wide_shade_index, void* userdata_base_pointer, void* output_base_pointer, - int run_mask_value); + int run_mask_value, + void* interactive_params_ptr); #endif /// Signature of a constant-folding method @@ -1661,8 +1664,7 @@ struct BatchedMessageBuffer { /// ShaderInstance), and the connections among them. class ShaderGroup { public: - ShaderGroup(string_view name); - ShaderGroup(const ShaderGroup& g, string_view name); + ShaderGroup(string_view name, ShadingSystemImpl& shadingsys); ~ShaderGroup(); /// Clear the layers @@ -1692,6 +1694,9 @@ class ShaderGroup { /// Array indexing returns the i-th layer of the group ShaderInstance* operator[](int i) const { return layer(i); } + /// Return a reference to the shading system for this group. + ShadingSystemImpl& shadingsys() const { return m_shadingsys; } + int optimized() const { return m_optimized; } void optimized(int opt) { m_optimized = opt; } @@ -1918,6 +1923,54 @@ class ShaderGroup { return nullptr; } + // Given a data block for interactive params, allocate space for it to + // live with the group and copy the initial data. + void setup_interactive_arena(cspan paramblock); + + uint8_t* interactive_arena_ptr() + { + return m_interactive_arena.get(); + } + + device_ptr& device_interactive_arena() + { + return m_device_interactive_arena; + } + + struct InteractiveParamData { + int layer; + ustring name; + int offset; + + InteractiveParamData(int layer, ustring name, int offset) + : layer(layer), name(name), offset(offset) + { + } + bool operator==(const InteractiveParamData& other) const + { + return layer == other.layer && name == other.name; + } + bool operator<(const InteractiveParamData& other) const + { + return layer < other.layer + || (layer == other.layer && name < other.name); + } + }; + + void add_interactive_param(int layer, ustring name, size_t offset) + { + m_interactive_params.emplace_back(layer, name, + static_cast(offset)); + } + + int interactive_param_offset(int layer, ustring name) + { + for (auto& f : m_interactive_params) + if (f.layer == layer && f.name == name) + return f.offset; + return -1; + } + private: // Put all the things that are read-only (after optimization) and // needed on every shade execution at the front of the struct, as much @@ -1978,6 +2031,14 @@ class ShaderGroup { ustring m_group_use; // "Usage" of group bool m_complete = false; // Successfully ShaderGroupEnd? + ShadingSystemImpl& m_shadingsys; // Back-ptr to the shading system + + // Per-group home for interactively editable parameters + std::vector m_interactive_params; + std::unique_ptr m_interactive_arena; + size_t m_interactive_arena_size = 0; + device_ptr m_device_interactive_arena; + friend class OSL::pvt::ShadingSystemImpl; friend class OSL::pvt::BackendLLVM; #if OSL_USE_BATCHED diff --git a/src/liboslexec/runtimeoptimize.cpp b/src/liboslexec/runtimeoptimize.cpp index 90c5d4d4f..63262a397 100644 --- a/src/liboslexec/runtimeoptimize.cpp +++ b/src/liboslexec/runtimeoptimize.cpp @@ -889,8 +889,10 @@ RuntimeOptimizer::simplify_params() Symbol* s(inst()->symbol(i)); if (s->symtype() != SymTypeParam) continue; // Skip non-params - if (!s->lockgeom()) - continue; // Don't mess with params that can change with the geom + // Don't simplify params that are interpolated or interactively + // editable + if (s->interpolated() || s->interactive()) + continue; if (s->typespec().is_structure() || s->typespec().is_closure_based()) continue; // We don't mess with struct placeholders or closures @@ -967,7 +969,7 @@ RuntimeOptimizer::simplify_params() // examining. ShaderInstance* uplayer = group()[c.srclayer]; Symbol* srcsym = uplayer->symbol(c.src.param); - if (!srcsym->lockgeom()) + if (srcsym->interpolated()) continue; // Not if it can be overridden by geometry // Is the source symbol known to be a global, from @@ -2184,7 +2186,7 @@ RuntimeOptimizer::optimize_ops(int beginop, int endop, if (opnum == inst()->m_maincodebegin) { for (int i = inst()->firstparam(); i < inst()->lastparam(); ++i) { Symbol* s(inst()->symbol(i)); - if (s->symtype() == SymTypeOutputParam && s->lockgeom() + if (s->symtype() == SymTypeOutputParam && !s->interpolated() && (s->valuesource() == Symbol::DefaultVal || s->valuesource() == Symbol::InstanceVal) && !s->has_init_ops() && !s->typespec().is_closure_based() @@ -2491,7 +2493,7 @@ RuntimeOptimizer::resolve_isconnected() s = inst()->symbol(fieldsymid); } bool upconnected = s->connected(); - if (!s->lockgeom() && shadingsys().userdata_isconnected()) + if (s->interpolated() && shadingsys().userdata_isconnected()) upconnected = true; int val = (upconnected ? 1 : 0) + (s->connected_down() ? 2 : 0); turn_into_assign(op, add_constant(TypeDesc::TypeInt, &val), @@ -3251,6 +3253,7 @@ RuntimeOptimizer::run() m_userdata_needed.clear(); m_attributes_needed.clear(); bool does_nothing = true; + std::vector interactive_data; for (int layer = 0; layer < nlayers; ++layer) { set_inst(layer); if (inst()->unused()) @@ -3262,7 +3265,7 @@ RuntimeOptimizer::run() // Find interpolated parameters if ((s.symtype() == SymTypeParam || s.symtype() == SymTypeOutputParam) - && !s.lockgeom()) { + && s.interpolated()) { UserDataNeeded udn(s.name(), layer, s.typespec().simpletype(), s.data(), s.has_derivs()); std::set::iterator found; @@ -3274,6 +3277,21 @@ RuntimeOptimizer::run() m_userdata_needed.insert(udn); } } + // Find interactive parameters + if (s.symtype() == SymTypeParam && s.interactive()) { + // Make enough room in interactive_data to accommodate this + // interactive parameter, including correct alignment. + size_t offset = interactive_data.size(); + size_t typesize = s.typespec().simpletype().size(); + size_t alignment = typesize > 4 ? 8 : 4; + offset = OIIO::round_to_multiple_of_pow2(offset, alignment); + interactive_data.resize(offset + typesize); + // Copy from the instance value to the interactive block + memcpy(&interactive_data[offset], s.data(), typesize); + // Make sure the symbol remembers it's stored in the interactive + // arena with the right offset. + group().add_interactive_param(layer, s.name(), offset); + } // Track which globals the group needs if (s.symtype() == SymTypeGlobal) { m_globals_needed.insert(s.name()); @@ -3377,6 +3395,7 @@ RuntimeOptimizer::run() } } group().does_nothing(does_nothing); + group().setup_interactive_arena(interactive_data); m_stat_specialization_time = rop_timer(); { diff --git a/src/liboslexec/shadingsys.cpp b/src/liboslexec/shadingsys.cpp index b22e88821..6199d51fb 100644 --- a/src/liboslexec/shadingsys.cpp +++ b/src/liboslexec/shadingsys.cpp @@ -943,6 +943,8 @@ ShadingSystem::convert_value(void* dst, TypeDesc dsttype, const void* src, return false; // Unsupported conversion } + + void register_JIT_Global(const char* global_var_name, void* global_var_addr) { @@ -1025,16 +1027,13 @@ ShadingSystemImpl::ShadingSystemImpl(RendererServices* renderer, , m_opt_texture_handle(true) , m_opt_seed_bblock_aliases(true) , m_opt_useparam(false) - , #if OSL_USE_BATCHED - m_opt_batched_analysis((renderer->batched(WidthOf<16>()) != nullptr) - || (renderer->batched(WidthOf<8>()) != nullptr)) - , + , m_opt_batched_analysis((renderer->batched(WidthOf<16>()) != nullptr) + || (renderer->batched(WidthOf<8>()) != nullptr)) #else - m_opt_batched_analysis(false) - , + , m_opt_batched_analysis(false) #endif - m_llvm_jit_fma(false) + , m_llvm_jit_fma(false) , m_llvm_jit_aggressive(false) , m_optimize_nondebug(false) , m_vector_width(4) @@ -2018,6 +2017,14 @@ ShadingSystemImpl::getattribute(ShaderGroup* group, string_view name, *(std::string*)val = exists ? group->m_llvm_ptx_compiled_version : ""; return true; } + if (name == "interactive_params" && type.basetype == TypeDesc::PTR) { + *(void**)val = group->m_interactive_arena.get(); + return true; + } + if (name == "device_interactive_params" && type.basetype == TypeDesc::PTR) { + *(void**)val = group->m_device_interactive_arena.d_get(); + return true; + } // All the remaining attributes require the group to already be // optimized. @@ -2586,7 +2593,7 @@ ShadingSystemImpl::Parameter(ShaderGroup& group, string_view name, TypeDesc t, ShaderGroupRef ShadingSystemImpl::ShaderGroupBegin(string_view groupname) { - ShaderGroupRef group(new ShaderGroup(groupname)); + ShaderGroupRef group(new ShaderGroup(groupname, *this)); group->m_exec_repeat = m_exec_repeat; { // Record the group in the SS's census of all extant groups @@ -3193,10 +3200,12 @@ ShadingSystemImpl::ReParameter(ShaderGroup& group, string_view layername_, { // Find the named layer ustring layername(layername_); - ShaderInstance* layer = NULL; + ShaderInstance* layer = nullptr; + int layerindex = -1; for (int i = 0, e = group.nlayers(); i < e; ++i) { if (group[i]->layername() == layername) { - layer = group[i]; + layer = group[i]; + layerindex = i; break; } } @@ -3216,6 +3225,14 @@ ShadingSystemImpl::ReParameter(ShaderGroup& group, string_view layername_, return false; } + // Check that it's declared to be an interactive parameter + if (!sym->interactive()) { + errorfmt( + "ReParameter cannot adjust {}.{}, which was not declared interactive", + layername, paramname); + return false; + } + // Check for mismatch versus previously-declared type if ((relaxed_param_typecheck() && !relaxed_equivalent(sym->typespec(), type)) || (!relaxed_param_typecheck() @@ -3228,7 +3245,13 @@ ShadingSystemImpl::ReParameter(ShaderGroup& group, string_view layername_, return false; // Do the deed - memcpy(sym->data(), val, type.size()); + int offset = group.interactive_param_offset(layerindex, sym->name()); + memcpy(group.interactive_arena_ptr() + offset, val, type.size()); + if (use_optix()) { + renderer()->copy_to_device(group.device_interactive_arena().d_get() + + offset, + val, type.size()); + } return true; } diff --git a/src/testrender/cuda/wrapper.cu b/src/testrender/cuda/wrapper.cu index 274a4d7ce..19bfd6b3a 100644 --- a/src/testrender/cuda/wrapper.cu +++ b/src/testrender/cuda/wrapper.cu @@ -183,14 +183,19 @@ __closesthit__closest_hit_osl() sg.context = &shading_context; // Run the OSL group and init functions + auto sbtdata = reinterpret_cast(optixGetSbtDataPointer()); const unsigned int shaderInitOpIdx = 2u + 2u * sg.shaderID + 0u; const unsigned int shaderGroupIdx = 2u + 2u * sg.shaderID + 1u; - optixDirectCall( - shaderInitOpIdx, &sg, params, nullptr, nullptr, - 0); // call osl_init_func - optixDirectCall( - shaderGroupIdx, &sg, params, nullptr, nullptr, - 0); // call osl_group_func + // call osl_init_func + optixDirectCall( + shaderInitOpIdx, &sg /*shaderglobals_ptr*/, params /*groupdata_ptr*/, + nullptr /*userdata_base_ptr*/, nullptr /*output_base_ptr*/, + 0 /*shadeindex - unused*/, sbtdata->data /*interactive_params_ptr*/); + // call osl_group_func + optixDirectCall( + shaderGroupIdx, &sg /*shaderglobals_ptr*/, params /*groupdata_ptr*/, + nullptr /*userdata_base_ptr*/, nullptr /*output_base_ptr*/, + 0 /*shadeindex - unused*/, sbtdata->data /*interactive_params_ptr*/); float3 result = process_closure((OSL::ClosureColor*)sg.Ci); uint3 launch_dims = optixGetLaunchDimensions(); diff --git a/src/testrender/optixraytracer.cpp b/src/testrender/optixraytracer.cpp index f8ff9b635..e9f3565e9 100644 --- a/src/testrender/optixraytracer.cpp +++ b/src/testrender/optixraytracer.cpp @@ -111,29 +111,42 @@ OptixRaytracer::~OptixRaytracer() -uint64_t -OptixRaytracer::register_global(const std::string& str, uint64_t value) +void* +OptixRaytracer::device_alloc(size_t size) { - auto it = m_globals_map.find(ustring(str)); - if (it != m_globals_map.end()) { - return it->second; + void* ptr = nullptr; + cudaError_t res = cudaMalloc(reinterpret_cast(&ptr), size); + if (res != cudaSuccess) { + errhandler().errorfmt("cudaMalloc({}) failed with error: {}\n", size, + cudaGetErrorString(res)); } - m_globals_map[ustring(str)] = value; - return value; + return ptr; } - -bool -OptixRaytracer::fetch_global(const std::string& str, uint64_t* value) +void +OptixRaytracer::device_free(void* ptr) { - auto it = m_globals_map.find(ustring(str)); + cudaError_t res = cudaFree(ptr); + if (res != cudaSuccess) { + errhandler().errorfmt("cudaFree() failed with error: {}\n", + cudaGetErrorString(res)); + } +} + - if (it != m_globals_map.end()) { - *value = it->second; - return true; +void* +OptixRaytracer::copy_to_device(void* dst_device, const void* src_host, + size_t size) +{ + cudaError_t res = cudaMemcpy(dst_device, src_host, size, + cudaMemcpyHostToDevice); + if (res != cudaSuccess) { + errhandler().errorfmt( + "cudaMemcpy host->device of size {} failed with error: {}\n", size, + cudaGetErrorString(res)); } - return false; + return dst_device; } @@ -449,6 +462,7 @@ OptixRaytracer::make_optix_materials() // Create materials int mtl_id = 0; + std::vector material_interactive_params; for (const auto& groupref : shaders()) { std::string group_name, init_name, entry_name; shadingsys->getattribute(groupref.get(), "groupname", group_name); @@ -474,7 +488,6 @@ OptixRaytracer::make_optix_materials() std::string osl_ptx; shadingsys->getattribute(groupref.get(), "ptx_compiled_version", OSL::TypeDesc::PTR, &osl_ptx); - if (osl_ptx.empty()) { errhandler().errorfmt("Failed to generate PTX for ShaderGroup {}", group_name); @@ -486,6 +499,11 @@ OptixRaytracer::make_optix_materials() OIIO::Filesystem::write_text_file(filename, osl_ptx); } + void* interactive_params = nullptr; + shadingsys->getattribute(groupref.get(), "device_interactive_params", + TypeDesc::PTR, &interactive_params); + material_interactive_params.push_back(interactive_params); + OptixModule optix_module; // Create Programs from the init and group_entry functions, @@ -544,6 +562,7 @@ OptixRaytracer::make_optix_materials() final_groups.push_back(sphere_fillSG_dc); // append the shader groups to our "official" list of program groups + // size_t shader_groups_start_index = final_groups.size(); final_groups.insert(final_groups.end(), shader_groups.begin(), shader_groups.end()); @@ -623,6 +642,13 @@ OptixRaytracer::make_optix_materials() sbt_records[sbtIndex++].data = reinterpret_cast(d_quads_list); sbt_records[sbtIndex++].data = reinterpret_cast(d_spheres_list); + // Fill in the data pointer for all the osl callables, starting at + // sbtIndex and 2 for each of the materials. + for (size_t i = 0, e = material_interactive_params.size(); i < e; ++i) { + sbt_records[sbtIndex + 2 * i].data = material_interactive_params[i]; + sbt_records[sbtIndex + 2 * i + 1].data = material_interactive_params[i]; + } + const int nshaders = int(shader_groups.size()); const int nhitgroups = (scene.quads.size() > 0) + (scene.spheres.size() > 0); diff --git a/src/testrender/optixraytracer.h b/src/testrender/optixraytracer.h index bc9928fa9..e63ade503 100644 --- a/src/testrender/optixraytracer.h +++ b/src/testrender/optixraytracer.h @@ -25,9 +25,6 @@ class OptixRaytracer final : public SimpleRaytracer { OptixRaytracer(); virtual ~OptixRaytracer(); - uint64_t register_global(const std::string& str, uint64_t value) override; - bool fetch_global(const std::string& str, uint64_t* value) override; - int supports(string_view feature) const override { if (feature == "OptiX") @@ -65,6 +62,11 @@ class OptixRaytracer final : public SimpleRaytracer { void processPrintfBuffer(void* buffer_data, size_t buffer_size); + virtual void* device_alloc(size_t size) override; + virtual void device_free(void* ptr) override; + virtual void* copy_to_device(void* dst_device, const void* src_host, + size_t size) override; + private: optix::Context m_optix_ctx = nullptr; @@ -95,7 +97,6 @@ class OptixRaytracer final : public SimpleRaytracer { std::string m_materials_ptx; std::unordered_map m_samplers; - std::unordered_map m_globals_map; }; diff --git a/src/testrender/render_params.h b/src/testrender/render_params.h index 420b4e161..bf375c2e5 100644 --- a/src/testrender/render_params.h +++ b/src/testrender/render_params.h @@ -1,3 +1,7 @@ +// Copyright Contributors to the Open Shading Language project. +// SPDX-License-Identifier: BSD-3-Clause +// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + #pragma once @@ -63,6 +67,12 @@ struct QuadParams : PrimitiveParams { struct GenericData { + // For geometry hit callables, data is the pointer to the array of + // primitive params for that primitive type, and sbtGeoIndex is the index + // for this primitive. + // + // For shader/material callables, data points to the interactive parameter + // data arena for that material. void* data; unsigned int sbtGeoIndex; }; @@ -72,7 +82,7 @@ struct GenericData { struct GenericRecord { __align__( OPTIX_SBT_RECORD_ALIGNMENT) char header[OPTIX_SBT_RECORD_HEADER_SIZE]; - + // What follows should duplicate GenericData void* data; unsigned int sbtGeoIndex; }; diff --git a/src/testshade/cuda/optix_grid_renderer.cu b/src/testshade/cuda/optix_grid_renderer.cu index 0b9a97f07..d4df4fb8c 100644 --- a/src/testshade/cuda/optix_grid_renderer.cu +++ b/src/testshade/cuda/optix_grid_renderer.cu @@ -66,7 +66,7 @@ __raygen__() uint3 launch_dims = optixGetLaunchDimensions(); uint3 launch_index = optixGetLaunchIndex(); - void* p = reinterpret_cast(optixGetSbtDataPointer()); + auto sbtdata = reinterpret_cast(optixGetSbtDataPointer()); // Compute the pixel coordinates float2 d = make_float2(static_cast(launch_index.x) + 0.5f, @@ -126,10 +126,17 @@ __raygen__() sg.renderstate = &closure_pool[0]; // Run the OSL group and init functions - optixDirectCall( - 0u, &sg, params, nullptr, nullptr, 0); // call osl_init_func - optixDirectCall( - 1u, &sg, params, nullptr, nullptr, 0); // call osl_group_func + + // call osl_init_func + optixDirectCall( + 0u, &sg /*shaderglobals_ptr*/, params /*groupdata_ptr*/, + nullptr /*userdata_base_ptr*/, nullptr /*output_base_ptr*/, + 0 /*shadeindex - unused*/, sbtdata->data /*interactive_params_ptr*/); + // call osl_group_func + optixDirectCall( + 1u, &sg /*shaderglobals_ptr*/, params /*groupdata_ptr*/, + nullptr /*userdata_base_ptr*/, nullptr /*output_base_ptr*/, + 0 /*shadeindex - unused*/, sbtdata->data /*interactive_params_ptr*/); float* f_output = (float*)params; int pixel = launch_index.y * launch_dims.x + launch_index.x; diff --git a/src/testshade/optixgridrender.cpp b/src/testshade/optixgridrender.cpp index e57b14eb1..42101c232 100644 --- a/src/testshade/optixgridrender.cpp +++ b/src/testshade/optixgridrender.cpp @@ -105,30 +105,42 @@ OptixGridRenderer::OptixGridRenderer() -uint64_t -OptixGridRenderer::register_global(const std::string& str, uint64_t value) +void* +OptixGridRenderer::device_alloc(size_t size) { - auto it = m_globals_map.find(ustring(str)); - - if (it != m_globals_map.end()) { - return it->second; + void* ptr = nullptr; + cudaError_t res = cudaMalloc(reinterpret_cast(&ptr), size); + if (res != cudaSuccess) { + errhandler().errorfmt("cudaMalloc({}) failed with error: {}\n", size, + cudaGetErrorString(res)); } - m_globals_map[ustring(str)] = value; - return value; + return ptr; } - -bool -OptixGridRenderer::fetch_global(const std::string& str, uint64_t* value) +void +OptixGridRenderer::device_free(void* ptr) { - auto it = m_globals_map.find(ustring(str)); + cudaError_t res = cudaFree(ptr); + if (res != cudaSuccess) { + errhandler().errorfmt("cudaFree() failed with error: {}\n", + cudaGetErrorString(res)); + } +} - if (it != m_globals_map.end()) { - *value = it->second; - return true; + +void* +OptixGridRenderer::copy_to_device(void* dst_device, const void* src_host, + size_t size) +{ + cudaError_t res = cudaMemcpy(dst_device, src_host, size, + cudaMemcpyHostToDevice); + if (res != cudaSuccess) { + errhandler().errorfmt( + "cudaMemcpy host->device of size {} failed with error: {}\n", size, + cudaGetErrorString(res)); } - return false; + return dst_device; } @@ -322,6 +334,7 @@ OptixGridRenderer::make_optix_materials() OptixProgramGroupOptions program_options = {}; std::vector program_groups; + std::vector material_interactive_params; // Raygen group OptixProgramGroupDesc raygen_desc = {}; @@ -510,6 +523,10 @@ OptixGridRenderer::make_optix_materials() pgDesc[1].callables.entryFunctionNameCC = nullptr; program_groups.resize(program_groups.size() + 2); + void* interactive_params = nullptr; + shadingsys->getattribute(groupref.get(), "device_interactive_params", + TypeDesc::PTR, &interactive_params); + material_interactive_params.push_back(interactive_params); sizeof_msg_log = sizeof(msg_log); OPTIX_CHECK_MSG( @@ -576,8 +593,8 @@ OptixGridRenderer::make_optix_materials() CUdeviceptr d_setglobals_raygenRecord; CUdeviceptr d_setglobals_missRecord; - EmptyRecord raygenRecord, missRecord, hitgroupRecord, callablesRecord[2]; - EmptyRecord setglobals_raygenRecord, setglobals_missRecord; + GenericRecord raygenRecord, missRecord, hitgroupRecord, callablesRecord[2]; + GenericRecord setglobals_raygenRecord, setglobals_missRecord; OPTIX_CHECK(optixSbtRecordPackHeader(raygen_group, &raygenRecord)); OPTIX_CHECK(optixSbtRecordPackHeader(miss_group, &missRecord)); @@ -591,26 +608,26 @@ OptixGridRenderer::make_optix_materials() OPTIX_CHECK(optixSbtRecordPackHeader(setglobals_miss_group, &setglobals_missRecord)); - raygenRecord.data = reinterpret_cast(5); + raygenRecord.data = material_interactive_params[0]; missRecord.data = nullptr; hitgroupRecord.data = nullptr; - callablesRecord[0].data = reinterpret_cast(1); - callablesRecord[1].data = reinterpret_cast(2); + callablesRecord[0].data = material_interactive_params[0]; + callablesRecord[1].data = material_interactive_params[0]; setglobals_raygenRecord.data = nullptr; setglobals_missRecord.data = nullptr; CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_raygenRecord), - sizeof(EmptyRecord))); + sizeof(GenericRecord))); CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_missRecord), - sizeof(EmptyRecord))); + sizeof(GenericRecord))); CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_hitgroupRecord), - sizeof(EmptyRecord))); + sizeof(GenericRecord))); CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_callablesRecord), - 2 * sizeof(EmptyRecord))); + 2 * sizeof(GenericRecord))); CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_setglobals_raygenRecord), - sizeof(EmptyRecord))); + sizeof(GenericRecord))); CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_setglobals_missRecord), - sizeof(EmptyRecord))); + sizeof(GenericRecord))); m_ptrs_to_free.push_back(reinterpret_cast(d_raygenRecord)); m_ptrs_to_free.push_back(reinterpret_cast(d_missRecord)); @@ -621,40 +638,40 @@ OptixGridRenderer::make_optix_materials() m_ptrs_to_free.push_back(reinterpret_cast(d_setglobals_missRecord)); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_raygenRecord), - &raygenRecord, sizeof(EmptyRecord), + &raygenRecord, sizeof(GenericRecord), cudaMemcpyHostToDevice)); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_missRecord), &missRecord, - sizeof(EmptyRecord), cudaMemcpyHostToDevice)); + sizeof(GenericRecord), cudaMemcpyHostToDevice)); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_hitgroupRecord), - &hitgroupRecord, sizeof(EmptyRecord), + &hitgroupRecord, sizeof(GenericRecord), cudaMemcpyHostToDevice)); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_callablesRecord), - &callablesRecord[0], 2 * sizeof(EmptyRecord), + &callablesRecord[0], 2 * sizeof(GenericRecord), cudaMemcpyHostToDevice)); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_setglobals_raygenRecord), - &setglobals_raygenRecord, sizeof(EmptyRecord), + &setglobals_raygenRecord, sizeof(GenericRecord), cudaMemcpyHostToDevice)); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_setglobals_missRecord), - &setglobals_missRecord, sizeof(EmptyRecord), + &setglobals_missRecord, sizeof(GenericRecord), cudaMemcpyHostToDevice)); // Looks like OptixShadingTable needs to be filled out completely m_optix_sbt.raygenRecord = d_raygenRecord; m_optix_sbt.missRecordBase = d_missRecord; - m_optix_sbt.missRecordStrideInBytes = sizeof(EmptyRecord); + m_optix_sbt.missRecordStrideInBytes = sizeof(GenericRecord); m_optix_sbt.missRecordCount = 1; m_optix_sbt.hitgroupRecordBase = d_hitgroupRecord; - m_optix_sbt.hitgroupRecordStrideInBytes = sizeof(EmptyRecord); + m_optix_sbt.hitgroupRecordStrideInBytes = sizeof(GenericRecord); m_optix_sbt.hitgroupRecordCount = 1; m_optix_sbt.callablesRecordBase = d_callablesRecord; - m_optix_sbt.callablesRecordStrideInBytes = sizeof(EmptyRecord); + m_optix_sbt.callablesRecordStrideInBytes = sizeof(GenericRecord); m_optix_sbt.callablesRecordCount = 2; // Shader binding table for SetGlobals stage m_setglobals_optix_sbt = {}; m_setglobals_optix_sbt.raygenRecord = d_setglobals_raygenRecord; m_setglobals_optix_sbt.missRecordBase = d_setglobals_missRecord; - m_setglobals_optix_sbt.missRecordStrideInBytes = sizeof(EmptyRecord); + m_setglobals_optix_sbt.missRecordStrideInBytes = sizeof(GenericRecord); m_setglobals_optix_sbt.missRecordCount = 1; return true; } diff --git a/src/testshade/optixgridrender.h b/src/testshade/optixgridrender.h index 07a6a4852..b60a278c9 100644 --- a/src/testshade/optixgridrender.h +++ b/src/testshade/optixgridrender.h @@ -25,9 +25,6 @@ class OptixGridRenderer final : public SimpleRenderer { OptixGridRenderer(); virtual ~OptixGridRenderer(); - uint64_t register_global(const std::string& str, uint64_t value) override; - bool fetch_global(const std::string& str, uint64_t* value) override; - int supports(string_view feature) const override { if (feature == "OptiX") @@ -70,6 +67,11 @@ class OptixGridRenderer final : public SimpleRenderer { void processPrintfBuffer(void* buffer_data, size_t buffer_size); + virtual void* device_alloc(size_t size) override; + virtual void device_free(void* ptr) override; + virtual void* copy_to_device(void* dst_device, const void* src_host, + size_t size) override; + private: optix::Context m_optix_ctx = nullptr; @@ -92,7 +94,6 @@ class OptixGridRenderer final : public SimpleRenderer { std::string m_materials_ptx; std::unordered_map m_samplers; - std::unordered_map m_globals_map; OSL::Matrix44 m_shader2common; // "shader" space to "common" space matrix OSL::Matrix44 m_object2common; // "object" space to "common" space matrix @@ -103,10 +104,4 @@ class OptixGridRenderer final : public SimpleRenderer { -struct EmptyRecord { - __align__( - OPTIX_SBT_RECORD_ALIGNMENT) char header[OPTIX_SBT_RECORD_HEADER_SIZE]; - void* data; -}; - OSL_NAMESPACE_EXIT diff --git a/src/testshade/render_params.h b/src/testshade/render_params.h index 1956f89de..e62c19e5d 100644 --- a/src/testshade/render_params.h +++ b/src/testshade/render_params.h @@ -1,6 +1,9 @@ +// Copyright Contributors to the Open Shading Language project. +// SPDX-License-Identifier: BSD-3-Clause +// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + #pragma once -#if (OPTIX_VERSION >= 70000) struct RenderParams { float invw; float invh; @@ -21,4 +24,18 @@ struct RenderParams { uint64_t test_str_1; uint64_t test_str_2; }; -#endif + + + +struct GenericData { + // For shader/material callables, data points to the interactive parameter + // data arena for that material. + void* data; +}; + +struct GenericRecord { + __align__( + OPTIX_SBT_RECORD_ALIGNMENT) char header[OPTIX_SBT_RECORD_HEADER_SIZE]; + // What follows should duplicate GenericData + void* data; +}; diff --git a/src/testshade/testshade.cpp b/src/testshade/testshade.cpp index 8af2b1c80..b7fb6589c 100644 --- a/src/testshade/testshade.cpp +++ b/src/testshade/testshade.cpp @@ -429,20 +429,30 @@ add_param(ParamValueList& params, string_view command, string_view paramname, ParamHints hint = ParamHints::none; float f[16]; - size_t pos; - while ((pos = command.find_first_of(":")) != std::string::npos) { + // Dissect optional modifiers from a command that might look like + // "--param:type=float:interactive=1" + size_t colonpos = command.find(':'); + if (colonpos != std::string::npos) { using namespace OIIO; - command = command.substr(pos + 1, std::string::npos); - auto splits = Strutil::splitsv(command, ":", 1); - if (splits.size() < 1) { - } else if (Strutil::istarts_with(splits[0], "type=")) - type.fromstring(splits[0].c_str() + 5); - else if (Strutil::istarts_with(splits[0], "lockgeom=")) - set(hint, ParamHints::interpolated, !Strutil::stoi(splits[0])); - else if (Strutil::istarts_with(splits[0], "interpolated=")) - set(hint, ParamHints::interpolated, Strutil::stoi(splits[0])); - else if (Strutil::istarts_with(splits[0], "interactive=")) - set(hint, ParamHints::interactive, Strutil::stoi(splits[0])); + // lob off the command and colon + command = command.substr(colonpos + 1); + auto options = Strutil::splitsv(command, ":"); + for (auto&& opt : options) { + // Each option should look like "foo=bar", split at the '=' + auto parts = Strutil::splitsv(opt, "="); + if (parts.size() == 2) { + if (parts[0] == "type") + type.fromstring(parts[1]); + else if (parts[0] == "lockgeom") + set(hint, ParamHints::interpolated, + !Strutil::stoi(parts[1])); + else if (parts[0] == "interpolated") + set(hint, ParamHints::interpolated, + Strutil::stoi(parts[1])); + else if (parts[0] == "interactive") + set(hint, ParamHints::interactive, Strutil::stoi(parts[1])); + } + } } // If it is or might be a matrix, look for 16 comma-separated floats @@ -2095,9 +2105,8 @@ test_shade(int argc, const char* argv[]) if (reparams.size() && reparam_layer.size() && (iter + 1 < iters)) { for (size_t p = 0; p < reparams.size(); ++p) { const ParamValue& pv(reparams[p]); - shadingsys->ReParameter(*shadergroup, reparam_layer.c_str(), - pv.name().c_str(), pv.type(), - pv.data()); + shadingsys->ReParameter(*shadergroup, reparam_layer, pv.name(), + pv.type(), pv.data()); } } } diff --git a/testsuite/example-cuda/cuda_grid_renderer.cpp b/testsuite/example-cuda/cuda_grid_renderer.cpp index d34c99793..5cd7ddce6 100644 --- a/testsuite/example-cuda/cuda_grid_renderer.cpp +++ b/testsuite/example-cuda/cuda_grid_renderer.cpp @@ -29,30 +29,46 @@ static ustring u_perspective("perspective"); static ustring u_s("s"), u_t("t"); -uint64_t -CudaGridRenderer::register_global(const std::string& str, uint64_t value) +void* +CudaGridRenderer::device_alloc(size_t size) { - auto it = _globals_map.find(ustring(str)); - - if (it != _globals_map.end()) { - return it->second; + void* ptr = nullptr; + cudaError_t res = cudaMalloc(reinterpret_cast(&ptr), size); + if (res != cudaSuccess) { + errhandler().errorfmt("cudaMalloc({}) failed with error: {}\n", size, + cudaGetErrorString(res)); } - _globals_map[ustring(str)] = value; - return value; + return ptr; } -bool -CudaGridRenderer::fetch_global(const std::string& str, uint64_t* value) + +void +CudaGridRenderer::device_free(void* ptr) { - auto it = _globals_map.find(ustring(str)); + cudaError_t res = cudaFree(ptr); + if (res != cudaSuccess) { + errhandler().errorfmt("cudaFree() failed with error: {}\n", + cudaGetErrorString(res)); + } +} - if (it != _globals_map.end()) { - *value = it->second; - return true; + +void* +CudaGridRenderer::copy_to_device(void* dst_device, const void* src_host, + size_t size) +{ + cudaError_t res = cudaMemcpy(dst_device, src_host, size, + cudaMemcpyHostToDevice); + if (res != cudaSuccess) { + errhandler().errorfmt( + "cudaMemcpy host->device of size {} failed with error: {}\n", size, + cudaGetErrorString(res)); } - return false; + return dst_device; } + + /// Return true if the texture handle (previously returned by /// get_texture_handle()) is a valid texture that can be subsequently /// read or sampled. diff --git a/testsuite/example-cuda/cuda_grid_renderer.h b/testsuite/example-cuda/cuda_grid_renderer.h index 64ea0d4dc..ff1a969ec 100644 --- a/testsuite/example-cuda/cuda_grid_renderer.h +++ b/testsuite/example-cuda/cuda_grid_renderer.h @@ -10,7 +10,6 @@ #include -using GlobalsMap = std::unordered_map; using TextureSamplerMap = std::unordered_map; // Just use 4x4 matrix for transformations @@ -19,7 +18,6 @@ typedef std::map> TransformMap class CudaGridRenderer final : public OSL::RendererServices { TextureSamplerMap _samplers; - GlobalsMap _globals_map; // Named transforms TransformMap _named_xforms; @@ -35,11 +33,6 @@ class CudaGridRenderer final : public OSL::RendererServices { CudaGridRenderer() {} virtual ~CudaGridRenderer() {} - uint64_t register_global(const std::string& str, uint64_t value); - bool fetch_global(const std::string& str, uint64_t* value); - - const GlobalsMap& globals_map() const { return _globals_map; } - virtual int supports(OIIO::string_view feature) const { if (feature == "OptiX") { @@ -72,4 +65,9 @@ class CudaGridRenderer final : public OSL::RendererServices { ustringhash to, float time); void name_transform(const char* name, const Transformation& xform); + + virtual void* device_alloc(size_t size) override; + virtual void device_free(void* ptr) override; + virtual void* copy_to_device(void* dst_device, const void* src_host, + size_t size) override; }; diff --git a/testsuite/example-cuda/example-cuda.cpp b/testsuite/example-cuda/example-cuda.cpp index 10a04b186..9bc4d924e 100644 --- a/testsuite/example-cuda/example-cuda.cpp +++ b/testsuite/example-cuda/example-cuda.cpp @@ -410,28 +410,6 @@ build_string_table_ptx(const CudaGridRenderer& rs) std::stringstream strlib_ss; - strlib_ss << "// so things name-mangle properly\n"; - strlib_ss << "struct DeviceString {\n"; - strlib_ss << " const char* m_chars;\n"; - strlib_ss << "};\n"; - - // write out all the global strings - for (auto&& gvar : rs.globals_map()) { - // std::cout << "global: " << gvar.first << " -> " << gvar.second - // << std::endl; - std::vector var_ns = extractNamespaces(gvar.first); - - // build namespace - for (size_t i = 0; i < var_ns.size() - 1; i++) - strlib_ss << "namespace " << var_ns[i] << " {\n"; - - strlib_ss << "__device__ DeviceString " << var_ns.back() - << " = { (const char *)" << gvar.second << "};\n"; - // close namespace up - for (size_t i = 0; i < var_ns.size() - 1; i++) - strlib_ss << "}\n"; - } - strlib_ss << "\n"; strlib_ss << "extern \"C\" __global__ void " "__direct_callable__strlib_dummy(int *j)\n"; diff --git a/testsuite/reparam-arrays/BATCHED b/testsuite/reparam-arrays/BATCHED new file mode 100644 index 000000000..e69de29bb diff --git a/testsuite/reparam/ref/out.tif b/testsuite/reparam-arrays/ref/out.tif similarity index 100% rename from testsuite/reparam/ref/out.tif rename to testsuite/reparam-arrays/ref/out.tif diff --git a/testsuite/reparam-arrays/ref/out.txt b/testsuite/reparam-arrays/ref/out.txt new file mode 100644 index 000000000..ecfae2f54 --- /dev/null +++ b/testsuite/reparam-arrays/ref/out.txt @@ -0,0 +1,13 @@ +Compiled test.osl -> test.oso +Compiled test_array.osl -> test_array.oso +Compiled test_colors.osl -> test_colors.oso + +Output Cout to out.tif + +Output Cout to out_array_iter0.tif + +Output Cout to out_array_iter1.tif + +Output Cout to out_colors_iter0.tif + +Output Cout to out_colors_iter1.tif diff --git a/testsuite/reparam/ref/out_array_iter0.tif b/testsuite/reparam-arrays/ref/out_array_iter0.tif similarity index 100% rename from testsuite/reparam/ref/out_array_iter0.tif rename to testsuite/reparam-arrays/ref/out_array_iter0.tif diff --git a/testsuite/reparam/ref/out_array_iter1.tif b/testsuite/reparam-arrays/ref/out_array_iter1.tif similarity index 100% rename from testsuite/reparam/ref/out_array_iter1.tif rename to testsuite/reparam-arrays/ref/out_array_iter1.tif diff --git a/testsuite/reparam/ref/out_colors_iter0.tif b/testsuite/reparam-arrays/ref/out_colors_iter0.tif similarity index 100% rename from testsuite/reparam/ref/out_colors_iter0.tif rename to testsuite/reparam-arrays/ref/out_colors_iter0.tif diff --git a/testsuite/reparam/ref/out_colors_iter1.tif b/testsuite/reparam-arrays/ref/out_colors_iter1.tif similarity index 100% rename from testsuite/reparam/ref/out_colors_iter1.tif rename to testsuite/reparam-arrays/ref/out_colors_iter1.tif diff --git a/testsuite/reparam-arrays/run.py b/testsuite/reparam-arrays/run.py new file mode 100755 index 000000000..575ce1e5a --- /dev/null +++ b/testsuite/reparam-arrays/run.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python + +# Copyright Contributors to the Open Shading Language project. +# SPDX-License-Identifier: BSD-3-Clause +# https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + +command += testshade ("-g 128 128 --layer testlay -param:interactive=1 scale 5.0 test -iters 2 -reparam testlay scale 15.0 -od uint8 -o Cout out.tif") + +command += testshade ("-g 128 128 --layer testlay -param:type=float[2] scale 5.0,2.0 test_array -od uint8 -o Cout out_array_iter0.tif") +command += testshade ("-g 128 128 --layer testlay -param:type=float[2]:interactive=1 scale 5.0,2.0 test_array -iters 2 -reparam:type=float[2] testlay scale 15.0,30.0 -od uint8 -o Cout out_array_iter1.tif") + +command += testshade ("-g 128 128 --layer testlay -param:type=color[2] colors 5,5,5,0.5,0.5,0.5 test_colors -od uint8 -o Cout out_colors_iter0.tif") +command += testshade ("-g 128 128 --layer testlay -param:type=color[2]:interactive=1 colors 5,5,5,0.5,0.5,0.5 test_colors -iters 2 -reparam:type=color[2] testlay colors 20,20,20,1.0,0.75,0.25 -od uint8 -o Cout out_colors_iter1.tif") + +outputs = [ "out.txt", "out.tif", "out_array_iter0.tif", "out_array_iter1.tif", "out_colors_iter0.tif", "out_colors_iter1.tif" ] + +# expect a few LSB failures +failthresh = 0.004 +failpercent = 0.05 diff --git a/testsuite/reparam-arrays/test.osl b/testsuite/reparam-arrays/test.osl new file mode 100644 index 000000000..b97e38ba2 --- /dev/null +++ b/testsuite/reparam-arrays/test.osl @@ -0,0 +1,10 @@ +// Copyright Contributors to the Open Shading Language project. +// SPDX-License-Identifier: BSD-3-Clause +// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + +shader +test (float scale = 20, + output color Cout = 0) +{ + Cout = (float) noise(u*scale, v*scale); +} diff --git a/testsuite/reparam/test_array.osl b/testsuite/reparam-arrays/test_array.osl similarity index 80% rename from testsuite/reparam/test_array.osl rename to testsuite/reparam-arrays/test_array.osl index 72c76e8f6..340b1baa6 100644 --- a/testsuite/reparam/test_array.osl +++ b/testsuite/reparam-arrays/test_array.osl @@ -3,7 +3,7 @@ // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage shader -test_array (float scale[2] = {0,0} [[ int lockgeom = 0 ]], +test_array (float scale[2] = {0,0} [[ int interactive = 1 ]], output color Cout = 0) { Cout = (float) noise(u*scale[0], v*scale[1]); diff --git a/testsuite/reparam/test_colors.osl b/testsuite/reparam-arrays/test_colors.osl similarity index 78% rename from testsuite/reparam/test_colors.osl rename to testsuite/reparam-arrays/test_colors.osl index 87a73e4fe..2adc92731 100644 --- a/testsuite/reparam/test_colors.osl +++ b/testsuite/reparam-arrays/test_colors.osl @@ -3,7 +3,7 @@ // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage shader -test_colors (color colors[2] = {{0,0,0},{0,0,0}} [[ int lockgeom = 0 ]], +test_colors (color colors[2] = {{0,0,0},{0,0,0}} [[ int interactive = 1 ]], output color Cout = 0) { point in = P*colors[0]; diff --git a/testsuite/reparam/OPTIX b/testsuite/reparam/OPTIX new file mode 100644 index 000000000..e69de29bb diff --git a/testsuite/reparam/ref/out.txt b/testsuite/reparam/ref/out.txt index ecfae2f54..015788bc3 100644 --- a/testsuite/reparam/ref/out.txt +++ b/testsuite/reparam/ref/out.txt @@ -1,13 +1,8 @@ Compiled test.osl -> test.oso -Compiled test_array.osl -> test_array.oso -Compiled test_colors.osl -> test_colors.oso +test: f = 1 +test: user = 2 +test: third = 3 +test: f = 10 +test: user = 2 +test: third = 30 -Output Cout to out.tif - -Output Cout to out_array_iter0.tif - -Output Cout to out_array_iter1.tif - -Output Cout to out_colors_iter0.tif - -Output Cout to out_colors_iter1.tif diff --git a/testsuite/reparam/run.py b/testsuite/reparam/run.py index 938b9373c..7c3bc74ee 100755 --- a/testsuite/reparam/run.py +++ b/testsuite/reparam/run.py @@ -4,16 +4,7 @@ # SPDX-License-Identifier: BSD-3-Clause # https://github.com/AcademySoftwareFoundation/OpenShadingLanguage -command += testshade ("-g 128 128 --layer testlay -param:lockgeom=0 scale 5.0 test -iters 2 -reparam testlay scale 15.0 -od uint8 -o Cout out.tif") +command += testshade ("--layer lay0 --param:type=float:interactive=1 f 1 --param:type=float:interactive=1 user 2 --param:type=float:interactive=1 third 3 test --iters 2 --reparam:type=float:interactive=1 lay0 f 10.0 --reparam:type=float:interactive=1 lay0 third 30.0") -command += testshade ("-g 128 128 --layer testlay -param:type=float[2] scale 5.0,2.0 test_array -od uint8 -o Cout out_array_iter0.tif") -command += testshade ("-g 128 128 --layer testlay -param:type=float[2] scale 5.0,2.0 test_array -iters 2 -reparam:type=float[2] testlay scale 15.0,30.0 -od uint8 -o Cout out_array_iter1.tif") +outputs = [ "out.txt" ] -command += testshade ("-g 128 128 --layer testlay -param:type=color[2] colors 5,5,5,0.5,0.5,0.5 test_colors -od uint8 -o Cout out_colors_iter0.tif") -command += testshade ("-g 128 128 --layer testlay -param:type=color[2] colors 5,5,5,0.5,0.5,0.5 test_colors -iters 2 -reparam:type=color[2] testlay colors 20,20,20,1.0,0.75,0.25 -od uint8 -o Cout out_colors_iter1.tif") - -outputs = [ "out.txt", "out.tif", "out_array_iter0.tif", "out_array_iter1.tif", "out_colors_iter0.tif", "out_colors_iter1.tif" ] - -# expect a few LSB failures -failthresh = 0.004 -failpercent = 0.05 diff --git a/testsuite/reparam/test.osl b/testsuite/reparam/test.osl index b97e38ba2..6cfbc8083 100644 --- a/testsuite/reparam/test.osl +++ b/testsuite/reparam/test.osl @@ -1,10 +1,13 @@ -// Copyright Contributors to the Open Shading Language project. -// SPDX-License-Identifier: BSD-3-Clause -// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage -shader -test (float scale = 20, - output color Cout = 0) +shader test(float g = 0, + float f = 1 [[ int interactive = 1 ]], + float user = 2 [[ int interpolated = 1 ]], + float third = 3 [[ int interpolated = 1 ]], + output color Cout = 0 + ) { - Cout = (float) noise(u*scale, v*scale); + printf("test: f = %g\n", f); + printf("test: user = %g\n", user); + printf("test: third = %g\n", third); + Cout = f + user + third; }