diff --git a/src/cmake/testing.cmake b/src/cmake/testing.cmake index ac55239289..06240a7a48 100644 --- a/src/cmake/testing.cmake +++ b/src/cmake/testing.cmake @@ -262,7 +262,7 @@ macro (osl_add_all_tests) geomath getattribute-camera getattribute-shader getsymbol-nonheap gettextureinfo gettextureinfo-reg globals-needed - group-outputs groupstring + group-outputs groupdata-opt groupstring hash hashnoise hex hyperb ieee_fp ieee_fp-reg if if-reg incdec initlist initops initops-instance-clash diff --git a/src/include/OSL/oslexec.h b/src/include/OSL/oslexec.h index 5bd849b566..670a7a9237 100644 --- a/src/include/OSL/oslexec.h +++ b/src/include/OSL/oslexec.h @@ -285,7 +285,7 @@ class OSLEXECPUBLIC ShadingSystem { /// opt_peephole, opt_coalesce_temps, opt_assign, opt_mix /// opt_merge_instances, opt_merge_instance_with_userdata, /// opt_fold_getattribute, opt_middleman, opt_texture_handle - /// opt_seed_bblock_aliases + /// opt_seed_bblock_aliases, opt_groupdata /// int opt_passes Number of optimization passes per layer (10) /// int llvm_optimize Which of several LLVM optimize strategies (1) /// int llvm_debug Set LLVM extra debug level (0) diff --git a/src/liboslexec/backendllvm.cpp b/src/liboslexec/backendllvm.cpp index 04967e1c20..d1020d6fe5 100644 --- a/src/liboslexec/backendllvm.cpp +++ b/src/liboslexec/backendllvm.cpp @@ -236,8 +236,9 @@ BackendLLVM::getLLVMSymbolBase(const Symbol& sym) llvm_ptr_type(sym.typespec().elementtype())); } - if (sym.symtype() == SymTypeParam || sym.symtype() == SymTypeOutputParam) { - // Special case for params -- they live in the group data + if (sym.symtype() == SymTypeParam + || (sym.symtype() == SymTypeOutputParam && !is_stack_parameter(sym))) { + // Special case for most params -- they live in the group data int fieldnum = m_param_order_map[&sym]; return groupdata_field_ptr(fieldnum, sym.typespec().elementtype().simpletype()); @@ -267,13 +268,26 @@ BackendLLVM::llvm_alloca(const TypeSpec& type, bool derivs, } +bool +BackendLLVM::is_stack_parameter(const Symbol& sym) +{ + if (!shadingsys().m_opt_groupdata) + return false; + + // Some output parameters that are never needed before or + // after layer execution can be relocated from GroupData + // onto the stack. + return sym.symtype() == SymTypeOutputParam && !sym.renderer_output() + && !sym.typespec().is_closure_based() + && !sym.connected(); +} llvm::Value* BackendLLVM::getOrAllocateLLVMSymbol(const Symbol& sym) { OSL_DASSERT( (sym.symtype() == SymTypeLocal || sym.symtype() == SymTypeTemp - || sym.symtype() == SymTypeConst) + || sym.symtype() == SymTypeConst || is_stack_parameter(sym)) && "getOrAllocateLLVMSymbol should only be for local, tmp, const"); Symbol* dealiased = sym.dealias(); std::string mangled_name = dealiased->mangled(); diff --git a/src/liboslexec/backendllvm.h b/src/liboslexec/backendllvm.h index ca6ead6068..e268d5eede 100644 --- a/src/liboslexec/backendllvm.h +++ b/src/liboslexec/backendllvm.h @@ -213,6 +213,10 @@ class BackendLLVM final : public OSOProcessorBase { llvm::Value* llvm_alloca(const TypeSpec& type, bool derivs, const std::string& name = "", int align = 0); + /// Checks if a symbol represents a parameter that can be stored on the + /// stack instead of in GroupData + bool is_stack_parameter(const Symbol& sym); + /// Given the OSL symbol, return the llvm::Value* corresponding to the /// address of the start of that symbol (first element, first component, /// and just the plain value if it has derivatives). This is retrieved diff --git a/src/liboslexec/llvm_instance.cpp b/src/liboslexec/llvm_instance.cpp index e2a3ed4710..a8e88cdc1c 100644 --- a/src/liboslexec/llvm_instance.cpp +++ b/src/liboslexec/llvm_instance.cpp @@ -351,6 +351,10 @@ BackendLLVM::llvm_type_groupdata() TypeSpec ts = sym.typespec(); if (ts.is_structure()) // skip the struct symbol itself continue; + + if (is_stack_parameter(sym)) + continue; + const int arraylen = std::max(1, sym.typespec().arraylength()); const int derivSize = (sym.has_derivs() ? 3 : 1); ts.make_array(arraylen * derivSize); @@ -1238,9 +1242,9 @@ BackendLLVM::build_llvm_instance(bool groupentry) // Skip structure placeholders if (s.typespec().is_structure()) continue; - // Allocate space for locals, temps, aggregate constants + // Allocate space for locals, temps, aggregate constants and some output params if (s.symtype() == SymTypeLocal || s.symtype() == SymTypeTemp - || s.symtype() == SymTypeConst) + || s.symtype() == SymTypeConst || is_stack_parameter(s)) getOrAllocateLLVMSymbol(s); // Set initial value for constants, closures, and strings that are // not parameters. diff --git a/src/liboslexec/oslexec_pvt.h b/src/liboslexec/oslexec_pvt.h index bc25cc371c..fe01a1743c 100644 --- a/src/liboslexec/oslexec_pvt.h +++ b/src/liboslexec/oslexec_pvt.h @@ -894,6 +894,7 @@ class ShadingSystemImpl { bool m_opt_texture_handle; ///< Use texture handles? bool m_opt_seed_bblock_aliases; ///< Turn on basic block alias seeds bool m_opt_useparam; ///< Perform extra useparam analysis for culling run layer calls + bool m_opt_groupdata; ///< Move eligible parameters out of groupdata into locals bool m_opt_batched_analysis; ///< Perform extra analysis required for batched execution? bool m_llvm_jit_fma; ///< Allow fused multiply/add in JIT bool m_llvm_jit_aggressive; ///< Turn on llvm "aggressive" JIT diff --git a/src/liboslexec/shadingsys.cpp b/src/liboslexec/shadingsys.cpp index d269bbcdb2..c5bb698fa5 100644 --- a/src/liboslexec/shadingsys.cpp +++ b/src/liboslexec/shadingsys.cpp @@ -1027,6 +1027,7 @@ ShadingSystemImpl::ShadingSystemImpl(RendererServices* renderer, , m_opt_texture_handle(true) , m_opt_seed_bblock_aliases(true) , m_opt_useparam(false) + , m_opt_groupdata(true) #if OSL_USE_BATCHED , m_opt_batched_analysis((renderer->batched(WidthOf<16>()) != nullptr) || (renderer->batched(WidthOf<8>()) != nullptr)) @@ -1542,6 +1543,7 @@ ShadingSystemImpl::attribute(string_view name, TypeDesc type, const void* val) ATTR_SET("opt_texture_handle", int, m_opt_texture_handle); ATTR_SET("opt_seed_bblock_aliases", int, m_opt_seed_bblock_aliases); ATTR_SET("opt_useparam", int, m_opt_useparam); + ATTR_SET("opt_groupdata", int, m_opt_groupdata); ATTR_SET("opt_batched_analysis", int, m_opt_batched_analysis); ATTR_SET("llvm_jit_fma", int, m_llvm_jit_fma); ATTR_SET("llvm_jit_aggressive", int, m_llvm_jit_aggressive); @@ -1713,6 +1715,7 @@ ShadingSystemImpl::getattribute(string_view name, TypeDesc type, void* val) ATTR_DECODE("opt_texture_handle", int, m_opt_texture_handle); ATTR_DECODE("opt_seed_bblock_aliases", int, m_opt_seed_bblock_aliases); ATTR_DECODE("opt_useparam", int, m_opt_useparam); + ATTR_DECODE("opt_groupdata", int, m_opt_groupdata); ATTR_DECODE("llvm_jit_fma", int, m_llvm_jit_fma); ATTR_DECODE("llvm_jit_aggressive", int, m_llvm_jit_aggressive); ATTR_DECODE_STRING("llvm_jit_target", m_llvm_jit_target); diff --git a/src/testshade/testshade.cpp b/src/testshade/testshade.cpp index b7fb6589c8..43231c22dd 100644 --- a/src/testshade/testshade.cpp +++ b/src/testshade/testshade.cpp @@ -78,6 +78,7 @@ static bool debugnan = false; static bool debug_uninit = false; static bool use_group_outputs = false; static bool do_oslquery = false; +static bool print_groupdata = false; static bool inbuffer = false; static bool use_shade_image = false; static bool userdata_isconnected = false; @@ -739,6 +740,8 @@ getargs(int argc, const char* argv[]) .help("Specify group outputs, not global outputs"); ap.arg("--oslquery", &do_oslquery) .help("Test OSLQuery at runtime"); + ap.arg("--print-groupdata", &print_groupdata) + .help("Print groupdata size to stdout"); ap.arg("--inbuffer", &inbuffer) .help("Compile osl source from and to buffer"); ap.arg("--no-output-placement") @@ -2168,6 +2171,14 @@ test_shade(int argc, const char* argv[]) std::cout << texturesys->getstats(5) << "\n"; std::cout << ustring::getstats() << "\n"; } + if (debug1 || print_groupdata) { + int groupdata_size; + shadingsys->getattribute(shadergroup.get(), "llvm_groupdata_size", + TypeDesc::INT, &groupdata_size); + + std::cout << "Groupdata size: " << groupdata_size << "\n"; + } + // Give the renderer a chance to do initial cleanup while everything is still alive rend->clear(); diff --git a/testsuite/groupdata-opt/OPTIX b/testsuite/groupdata-opt/OPTIX new file mode 100644 index 0000000000..e69de29bb2 diff --git a/testsuite/groupdata-opt/connected_output.osl b/testsuite/groupdata-opt/connected_output.osl new file mode 100644 index 0000000000..e34ce1b427 --- /dev/null +++ b/testsuite/groupdata-opt/connected_output.osl @@ -0,0 +1,10 @@ +// Copyright Contributors to the Open Shading Language project. +// SPDX-License-Identifier: BSD-3-Clause +// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + +// upstream shader connects directly to output parameter; +// make sure we don't optimize it onto stack +shader connected_output_shader(output float Val_Out = -2.) +{ + printf("side-effect\n"); +} diff --git a/testsuite/groupdata-opt/input.osl b/testsuite/groupdata-opt/input.osl new file mode 100644 index 0000000000..f3fa7eb642 --- /dev/null +++ b/testsuite/groupdata-opt/input.osl @@ -0,0 +1,8 @@ +// Copyright Contributors to the Open Shading Language project. +// SPDX-License-Identifier: BSD-3-Clause +// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + +shader input_shader(output float Val_Out = -1.) +{ + Val_Out = float(cellnoise(u, v)); +} diff --git a/testsuite/groupdata-opt/main.osl b/testsuite/groupdata-opt/main.osl new file mode 100644 index 0000000000..014a825f05 --- /dev/null +++ b/testsuite/groupdata-opt/main.osl @@ -0,0 +1,8 @@ +// Copyright Contributors to the Open Shading Language project. +// SPDX-License-Identifier: BSD-3-Clause +// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + +shader main_shader(float Val_In = -3.) +{ + printf("Val_in = %f\n", Val_In); +} diff --git a/testsuite/groupdata-opt/ref/out.txt b/testsuite/groupdata-opt/ref/out.txt new file mode 100644 index 0000000000..86f8d3650e --- /dev/null +++ b/testsuite/groupdata-opt/ref/out.txt @@ -0,0 +1,17 @@ +Compiled connected_output.osl -> connected_output_shader.oso +Compiled input.osl -> input_shader.oso +Compiled main.osl -> main_shader.oso +Connect input_layer.Val_Out to main_layer.Val_In +Val_in = 0.860313 + +Groupdata size: 12 +Connect input_layer.Val_Out to main_layer.Val_In +Val_in = 0.860313 + +Groupdata size: 8 +Connect input_layer.Val_Out to test_layer.Val_Out +Connect test_layer.Val_Out to main_layer.Val_In +side-effect +Val_in = 0.860313 + +Groupdata size: 12 diff --git a/testsuite/groupdata-opt/run.py b/testsuite/groupdata-opt/run.py new file mode 100755 index 0000000000..b225d0a627 --- /dev/null +++ b/testsuite/groupdata-opt/run.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +# Copyright Contributors to the Open Shading Language project. +# SPDX-License-Identifier: BSD-3-Clause +# https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + +shader_commands = " ".join([ + "-shader input_shader input_layer", + "-shader main_shader main_layer", + "-connect input_layer Val_Out main_layer Val_In", +]) +for opt in [0, 1]: + # Assert that groupdata is sized differently based on opt_groupdata param + command += testshade("{} --options opt_groupdata={} --print-groupdata".format(shader_commands, opt)) + +# Assert that opt_groupdata correctly skips connected output parameters +# Other skip conditions (renderer outputs, closures) are covered by existing tests +shader_commands = " ".join([ + "-shader input_shader input_layer", + "-shader connected_output_shader test_layer", + "-shader main_shader main_layer", + "-connect input_layer Val_Out test_layer Val_Out", + "-connect test_layer Val_Out main_layer Val_In", +]) +command += testshade("{} --print-groupdata".format(shader_commands))