vgvassilev · vgvassilev · Oct 30, 2024 · Jul 31, 2024 · Aug 28, 2024 · Oct 19, 2024
diff --git a/docs/userDocs/source/index.rst b/docs/userDocs/source/index.rst
@@ -90,6 +90,7 @@ The User Guide
    user/tutorials
    user/UsingEnzymeWithinClad
    user/UsingVectorMode.rst
+   user/UsingImmediateMode
    user/FAQ
    user/DevelopersDocumentation
    user/IntroductionToClangForCladContributors

diff --git a/docs/userDocs/source/user/UsingImmediateMode.rst b/docs/userDocs/source/user/UsingImmediateMode.rst
@@ -0,0 +1,71 @@
+Using Clad-generated derivatives in an immediate context
+**********************************************************
+
+The derivatives that Clad generates are valid C++ code, which could in theory
+be executed at compile-time (or in an immediate context as the C++ standard
+calls it). When a function is differentiated all specifiers, such as
+`constexpr` and `consteval` are kept, but it is important to understand the
+interface that Clad provides for those derivatives to the user.
+
+When Clad differentiates a function (e.g. with `clad::differentiate`) the user
+receives a `CladFunction`, which contains a function pointer to the generated
+derivative, among many other things. Unfortunately due to how the C++ standard
+is written handling function pointers in an immediate context is very
+restricted and care needs to be taken to not violate the rules or the compiler
+won't be able to evaluate our `constexpr`/`consteval` functions during
+translation.
+
+Currently to get a `CladFunction` that is usable in immediate mode the user has
+to pass `clad::immediate_mode` to the differentiation function and that removes
+the ability to dump the generated derivative, but it may be possible to add
+support for that in the future.
+
+Usage of Clad's immediate mode
+================================================
+
+The following code snippet shows how one can request Clad to use the immediate
+mode for differentiation::
+
+    #include "clad/Differentiator/Differentiator.h"
+
+    constexpr double fn(double x, double y) {
+        return (x + y) / 2;
+    }
+
+    constexpr double fn_test() {
+        auto dx = clad::differentiate<clad::immediate_mode>(fn, "x");
+
+        return dx.execute(4, 7);
+    }
+
+    int main(){
+        constexpr double fn_result = fn_test();
+
+        printf("%.2f\n", fn_result);
+    }
+
+It is neccessary both to pass the `clad::immediate_mode` option to
+`clad::differentiate` and to keep both the call to `clad::differentiate` and
+all it's `.execute(...)` calls in the same immediate context, as the C++
+standard forbids having a function pointer to an immediate function outside of
+an immediate context. (It is not possible to do the differentiation and
+executions in main as `dx` would contain such a pointer, but `main` is not and
+can not be immediate)
+
+When using `constexpr` there is no easy way to tell whether the functions are
+actually being evaluated during translation, so it is a good idea to use either
+`consteval` or an `if consteval` (in C++23 and newer) to check if the immediate
+contexts are behaving as expected or assign the results to a variable marked
+`constexpr` as that would fail if the expression that is being assigned isn't
+immediate.
+
+Use cases supported by Clad's immediate mode
+================================================
+
+Currently Clad's immediate mode is primarily meant to be used in the forward
+mode (`clad::differentiate`) as internal data structures that Clad needs for
+differentiating loops, etc. are not yet usable in an immediate context.
+
+Both `constexpr` and `consteval` are supported as Clad doesn't actually rely on
+these specific keywords for its support, but instead uses clang's API to
+determine if the functions are immediate and should be differentiated eariler.
diff --git a/include/clad/Differentiator/ArrayRef.h b/include/clad/Differentiator/ArrayRef.h
@@ -25,18 +25,19 @@ template <typename T> class array_ref {
   array_ref() = default;
   /// Constructor to store the pointer to and size of an array supplied by the
   /// user
-  CUDA_HOST_DEVICE array_ref(T* arr, std::size_t size)
+  constexpr CUDA_HOST_DEVICE array_ref(T* arr, std::size_t size)
       : m_arr(arr), m_size(size) {}
   /// Constructor for arrays having size equal to 1 or non pointer types to
   /// store their addresses
-  CUDA_HOST_DEVICE array_ref(T* a) : m_arr(a), m_size(1) {}
+  constexpr CUDA_HOST_DEVICE array_ref(T* a) : m_arr(a), m_size(1) {}
   /// Constructor for clad::array types
-  CUDA_HOST_DEVICE array_ref(array<T>& a) : m_arr(a.ptr()), m_size(a.size()) {}
+  constexpr CUDA_HOST_DEVICE array_ref(array<T>& a)
+      : m_arr(a.ptr()), m_size(a.size()) {}
 
   /// Operator for conversion from array_ref<T> to T*.
-  CUDA_HOST_DEVICE operator T*() { return m_arr; }
+  constexpr CUDA_HOST_DEVICE operator T*() { return m_arr; }
   /// Operator for conversion from array_ref<T> to const T*.
-  CUDA_HOST_DEVICE operator const T*() const { return m_arr; }
+  constexpr CUDA_HOST_DEVICE operator const T*() const { return m_arr; }
 
   template <typename U>
   CUDA_HOST_DEVICE array_ref<T>& operator=(const array<U>& a) {
@@ -46,25 +47,26 @@ template <typename T> class array_ref {
     return *this;
   }
   template <typename U>
-  CUDA_HOST_DEVICE array_ref<T>& operator=(const array_ref<T>& a) {
+  constexpr CUDA_HOST_DEVICE array_ref<T>& operator=(const array_ref<T>& a) {
     m_arr = a.ptr();
     m_size = a.size();
     return *this;
   }
   /// Returns the size of the underlying array
-  CUDA_HOST_DEVICE std::size_t size() const { return m_size; }
-  CUDA_HOST_DEVICE PUREFUNC T* ptr() const { return m_arr; }
-  CUDA_HOST_DEVICE PUREFUNC T*& ptr_ref() { return m_arr; }
+  constexpr CUDA_HOST_DEVICE std::size_t size() const { return m_size; }
-  constexpr CUDA_HOST_DEVICE std::size_t size() const { return m_size; }
+  [[nodiscard]] constexpr CUDA_HOST_DEVICE std::size_t size() const { return m_size; }
-  constexpr CUDA_HOST_DEVICE std::size_t size() const { return m_size; }
+  [[nodiscard]] constexpr CUDA_HOST_DEVICE std::size_t size() const { return m_size; }
+  constexpr CUDA_HOST_DEVICE PUREFUNC T* ptr() const { return m_arr; }
+  constexpr CUDA_HOST_DEVICE PUREFUNC T*& ptr_ref() { return m_arr; }
   /// Returns an array_ref to a part of the underlying array starting at
   /// offset and having the specified size
-  CUDA_HOST_DEVICE array_ref<T> slice(std::size_t offset, std::size_t size) {
+  constexpr CUDA_HOST_DEVICE array_ref<T> slice(std::size_t offset,
+                                                std::size_t size) {
     assert((offset >= 0) && (offset + size <= m_size) &&
            "Window is outside array. Please provide an offset and size "
            "inside the array size.");
     return array_ref<T>(&m_arr[offset], size);
   }
   /// Returns the reference to the underlying array
-  CUDA_HOST_DEVICE PUREFUNC T& operator*() { return *m_arr; }
+  constexpr CUDA_HOST_DEVICE PUREFUNC T& operator*() { return *m_arr; }
 
   // Arithmetic overloads
   /// Divides the arrays element wise
@@ -171,7 +173,7 @@ template <typename T> class array_ref {
 
 /// Multiplies the arrays element wise
 template <typename T, typename U>
-CUDA_HOST_DEVICE
+constexpr CUDA_HOST_DEVICE
     array_expression<const array_ref<T>&, BinaryMul, const array_ref<U>&>
     operator*(const array_ref<T>& Ar, const array_ref<U>& Br) {
   assert(Ar.size() == Br.size() &&
@@ -183,7 +185,7 @@ CUDA_HOST_DEVICE
 
 /// Adds the arrays element wise
 template <typename T, typename U>
-CUDA_HOST_DEVICE
+constexpr CUDA_HOST_DEVICE
     array_expression<const array_ref<T>&, BinaryAdd, const array_ref<U>&>
     operator+(const array_ref<T>& Ar, const array_ref<U>& Br) {
   assert(Ar.size() == Br.size() &&
@@ -195,7 +197,7 @@ CUDA_HOST_DEVICE
 
 /// Subtracts the arrays element wise
 template <typename T, typename U>
-CUDA_HOST_DEVICE
+constexpr CUDA_HOST_DEVICE
     array_expression<const array_ref<T>&, BinarySub, const array_ref<U>&>
     operator-(const array_ref<T>& Ar, const array_ref<U>& Br) {
   assert(
@@ -208,7 +210,7 @@ CUDA_HOST_DEVICE
 
 /// Divides the arrays element wise
 template <typename T, typename U>
-CUDA_HOST_DEVICE
+constexpr CUDA_HOST_DEVICE
     array_expression<const array_ref<T>&, BinaryDiv, const array_ref<U>&>
     operator/(const array_ref<T>& Ar, const array_ref<U>& Br) {
   assert(Ar.size() == Br.size() &&
@@ -221,55 +223,55 @@ CUDA_HOST_DEVICE
 /// Multiplies array_ref by a scalar
 template <typename T, typename U,
           typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
-CUDA_HOST_DEVICE array_expression<const array_ref<T>&, BinaryMul, U>
+constexpr CUDA_HOST_DEVICE array_expression<const array_ref<T>&, BinaryMul, U>
 operator*(const array_ref<T>& Ar, U a) {
   return array_expression<const array_ref<T>&, BinaryMul, U>(Ar, a);
 }
 
 /// Multiplies array_ref by a scalar (reverse order)
 template <typename T, typename U,
           typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
-CUDA_HOST_DEVICE array_expression<const array_ref<T>&, BinaryMul, U>
+constexpr CUDA_HOST_DEVICE array_expression<const array_ref<T>&, BinaryMul, U>
 operator*(U a, const array_ref<T>& Ar) {
   return array_expression<const array_ref<T>&, BinaryMul, U>(Ar, a);
 }
 
 /// Divides array_ref by a scalar
 template <typename T, typename U,
           typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
-CUDA_HOST_DEVICE array_expression<const array_ref<T>&, BinaryDiv, U>
+constexpr CUDA_HOST_DEVICE array_expression<const array_ref<T>&, BinaryDiv, U>
 operator/(const array_ref<T>& Ar, U a) {
   return array_expression<const array_ref<T>&, BinaryDiv, U>(Ar, a);
 }
 
 /// Adds array_ref by a scalar
 template <typename T, typename U,
           typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
-CUDA_HOST_DEVICE array_expression<const array_ref<T>&, BinaryAdd, U>
+constexpr CUDA_HOST_DEVICE array_expression<const array_ref<T>&, BinaryAdd, U>
 operator+(const array_ref<T>& Ar, U a) {
   return array_expression<const array_ref<T>&, BinaryAdd, U>(Ar, a);
 }
 
 /// Adds array_ref by a scalar (reverse order)
 template <typename T, typename U,
           typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
-CUDA_HOST_DEVICE array_expression<const array_ref<T>&, BinaryAdd, U>
+constexpr CUDA_HOST_DEVICE array_expression<const array_ref<T>&, BinaryAdd, U>
 operator+(U a, const array_ref<T>& Ar) {
   return array_expression<const array_ref<T>&, BinaryAdd, U>(Ar, a);
 }
 
 /// Subtracts array_ref by a scalar
 template <typename T, typename U,
           typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
-CUDA_HOST_DEVICE array_expression<const array_ref<T>&, BinarySub, U>
+constexpr CUDA_HOST_DEVICE array_expression<const array_ref<T>&, BinarySub, U>
 operator-(const array_ref<T>& Ar, U a) {
   return array_expression<const array_ref<T>&, BinarySub, U>(Ar, a);
 }
 
 /// Subtracts array_ref by a scalar (reverse order)
 template <typename T, typename U,
           typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
-CUDA_HOST_DEVICE array_expression<U, BinarySub, const array_ref<T>&>
+constexpr CUDA_HOST_DEVICE array_expression<U, BinarySub, const array_ref<T>&>
 operator-(U a, const array_ref<T>& Ar) {
   return array_expression<U, BinarySub, const array_ref<T>&>(a, Ar);
 }
@@ -303,16 +305,18 @@ operator-(U a, const array_ref<T>& Ar) {
     template <typename T, class = typename std::enable_if<
                               std::is_pointer<T>::value ||
                               std::is_same<T, std::nullptr_t>::value>::type>
-    CUDA_HOST_DEVICE array_ref(T arr, std::size_t size = 1)
+    constexpr CUDA_HOST_DEVICE array_ref(T arr, std::size_t size = 1)
         : m_arr((void*)arr), m_size(size) {}
     template <typename T>
-    CUDA_HOST_DEVICE array_ref(const array_ref<T>& other)
+    constexpr CUDA_HOST_DEVICE array_ref(const array_ref<T>& other)
         : m_arr(other.ptr()), m_size(other.size()) {}
-    template <typename T> CUDA_HOST_DEVICE operator array_ref<T>() {
+    template <typename T> constexpr CUDA_HOST_DEVICE operator array_ref<T>() {
       return array_ref<T>((T*)(m_arr), m_size);
     }
-    CUDA_HOST_DEVICE void* ptr() const { return m_arr; }
-    CUDA_HOST_DEVICE std::size_t size() const { return m_size; }
+    [[nodiscard]] constexpr CUDA_HOST_DEVICE void* ptr() const { return m_arr; }
+    [[nodiscard]] constexpr CUDA_HOST_DEVICE std::size_t size() const {
+      return m_size;
+    }
   };
   // NOLINTEND(*-pointer-arithmetic)
 } // namespace clad

diff --git a/include/clad/Differentiator/CladConfig.h b/include/clad/Differentiator/CladConfig.h
@@ -34,6 +34,9 @@ enum opts : unsigned {
 
   // Specifying whether we only want the diagonal of the hessian.
   diagonal_only = 1 << (ORDER_BITS + 4),
+
+  // Specify that we need a constexpr-enabled CladFunction
+  immediate_mode = 1 << (ORDER_BITS + 7),
 }; // enum opts
 
 constexpr unsigned GetDerivativeOrder(const unsigned bitmasked_opts) {

diff --git a/include/clad/Differentiator/DiffPlanner.h b/include/clad/Differentiator/DiffPlanner.h
@@ -65,6 +65,9 @@ struct DiffRequest {
   /// A flag to enable TBR analysis during reverse-mode differentiation.
   bool EnableTBRAnalysis = false;
   bool EnableVariedAnalysis = false;
+  /// A flag specifying whether this differentiation is to be used
+  /// in immediate contexts.
+  bool ImmediateMode = false;
   /// Puts the derived function and its code in the diff call
   void updateCall(clang::FunctionDecl* FD, clang::FunctionDecl* OverloadedFD,
                   clang::Sema& SemaRef);