diff --git a/benchmark/AlgorithmicComplexity.cpp b/benchmark/AlgorithmicComplexity.cpp index 9717cb0cd..15089a24e 100644 --- a/benchmark/AlgorithmicComplexity.cpp +++ b/benchmark/AlgorithmicComplexity.cpp @@ -19,7 +19,7 @@ static void BM_NumericGausP(benchmark::State& state) { double p[] = {1, 2, 3, 4, 5}; double dx[5] = {0, 0, 0, 0, 0}; double dp[5] = {0, 0, 0, 0, 0}; - clad::tape> results = {}; + clad::old_tape> results = {}; int dim = 5; results.emplace_back(dx, dim); results.emplace_back(dp, dim); diff --git a/demos/CustomTypeNumDiff.cpp b/demos/CustomTypeNumDiff.cpp index 0f92c22f9..33bc6adb1 100644 --- a/demos/CustomTypeNumDiff.cpp +++ b/demos/CustomTypeNumDiff.cpp @@ -134,7 +134,7 @@ int main() { // This is how we return the derivative with respect to all arguments. // The order of being placed in this tape should be the same as the order of // the arguments being passed to the function. - clad::tape> grad = {}; // Place the l-value reference of the variables in the tape. diff --git a/include/clad/Differentiator/Differentiator.h b/include/clad/Differentiator/Differentiator.h index cca1cd5cf..690e232ed 100644 --- a/include/clad/Differentiator/Differentiator.h +++ b/include/clad/Differentiator/Differentiator.h @@ -14,6 +14,7 @@ #include "DynamicGraph.h" #include "FunctionTraits.h" #include "Matrix.h" +#include "NewTape.h" #include "NumericalDiff.h" #include "Tape.h" @@ -47,7 +48,9 @@ inline CUDA_HOST_DEVICE unsigned int GetLength(const char* code) { #endif /// Tape type used for storing values in reverse-mode AD inside loops. -template using tape = tape_impl; +template using tape = new_tape_impl; + +template using old_tape = tape_impl; /// Add value to the end of the tape, return the same value. template diff --git a/include/clad/Differentiator/NewTape.h b/include/clad/Differentiator/NewTape.h new file mode 100644 index 000000000..743ca5dd3 --- /dev/null +++ b/include/clad/Differentiator/NewTape.h @@ -0,0 +1,119 @@ +#ifndef CLAD_DIFFERENTIATOR_NEWTAPE_H +#define CLAD_DIFFERENTIATOR_NEWTAPE_H + +#include +#include +#include +#include + +#include "clad/Differentiator/CladConfig.h" + +namespace clad { + +static const int capacity = 32; + +template class Block { +public: + T data[capacity]; + Block* next; + Block* prev; + using pointer = T*; + using iterator = pointer; + + CUDA_HOST_DEVICE Block() { + } + + CUDA_HOST_DEVICE ~Block() { destroy(block_begin(), block_end()); } + + Block(const Block& other) = delete; + Block& operator=(const Block& other) = delete; + + Block(Block&& other) = delete; + Block& operator=(const Block&& other) = delete; + + CUDA_HOST_DEVICE iterator block_begin() { return data; } + + CUDA_HOST_DEVICE iterator block_end() { return data + capacity; } + + template using value_type_of = decltype(*std::declval()); + + template + static typename std::enable_if< + !std::is_trivially_destructible>::value>::type + destroy(It B, It E) { + for (It I = E - 1; I >= B; --I) + I->~value_type_of(); + } + + template + static typename std::enable_if< + std::is_trivially_destructible>::value>::type + CUDA_HOST_DEVICE + destroy(It B, It E) {} +}; + +template class new_tape_impl { + using NonConstT = typename std::remove_cv::type; + + Block* m_cur_block = nullptr; + std::size_t m_size = 0; + +public: + new_tape_impl() = default; + + ~new_tape_impl() { } + + new_tape_impl(new_tape_impl& other) = delete; + new_tape_impl operator=(new_tape_impl& other) = delete; + + new_tape_impl(new_tape_impl&& other) = delete; + new_tape_impl& operator=(new_tape_impl&& other) = delete; + + template + + CUDA_HOST_DEVICE void emplace_back(ArgsT&&... args) { + if (!m_cur_block || m_size >= capacity) { + Block* prev_block = m_cur_block; + m_cur_block = static_cast*>(::operator new(sizeof(Block))); + if (prev_block != nullptr) { + prev_block->next = m_cur_block; + m_cur_block->prev = prev_block; + } + m_size = 0; + } + m_size += 1; + ::new (const_cast(static_cast(end()))) + T(std::forward(args)...); + } + + [[nodiscard]] CUDA_HOST_DEVICE std::size_t size() const { return m_size; } + + CUDA_HOST_DEVICE T* end() { return m_cur_block->data + (m_size - 1); } + + CUDA_HOST_DEVICE T& back() { + assert(m_size || m_cur_block->prev); + return *end(); + } + + CUDA_HOST_DEVICE void pop_back() { + assert(m_size || m_cur_block->prev); + m_size -= 1; + if (m_size == 0) { + Block* temp = m_cur_block; + m_cur_block = m_cur_block->prev; + // delete temp; + m_size = capacity; + } + } + + void destroy() { + while (m_cur_block != nullptr) { + Block* prev_block = m_cur_block->prev; + delete m_cur_block; + m_cur_block = prev_block; + } + } +}; +} // namespace clad + +#endif // CLAD_DIFFERENTIATOR_NEWTAPE_H diff --git a/test/NumericalDiff/PureCentralDiffCalls.C b/test/NumericalDiff/PureCentralDiffCalls.C index 399e7f784..8038229b6 100644 --- a/test/NumericalDiff/PureCentralDiffCalls.C +++ b/test/NumericalDiff/PureCentralDiffCalls.C @@ -73,7 +73,7 @@ int main() { // expected-no-diagnostics printf("Result is = %f\n", func1_res); // CHECK-EXEC: Result is = 2.000000 // Gradients, derivative wrt all args - clad::tape> grad = {}; + clad::old_tape> grad = {}; grad.emplace_back(dx, 3); grad.emplace_back(&dy); grad.emplace_back(&dz);