vgvassilev · PetroZarytskyi · Oct 8, 2024 · Oct 9, 2024 · Oct 25, 2024 · Oct 3, 2024
diff --git a/include/clad/Differentiator/Array.h b/include/clad/Differentiator/Array.h
@@ -446,6 +446,46 @@ operator/(const array<T>& arr1, const array<U>& arr2) {
                                                                        arr2);
 }
 
+/// Function to define element wise adding of an arrays and an array_ref.
+template <typename T, typename U>
+CUDA_HOST_DEVICE
+    array_expression<const array<T>&, BinaryAdd, const array_ref<U>&>
+    operator+(const array<T>& arr1, const array_ref<U>& arr2) {
+  assert(arr1.size() == arr2.size());
+  return array_expression<const array<T>&, BinaryAdd, const array_ref<U>&>(
+      arr1, arr2);
+}
+
+/// Function to define element wise adding of an arrays_ref and an array.
+template <typename T, typename U>
+CUDA_HOST_DEVICE
+    array_expression<const array_ref<T>&, BinaryAdd, const array<U>&>
+    operator+(const array_ref<T>& arr1, const array<U>& arr2) {
+  assert(arr1.size() == arr2.size());
+  return array_expression<const array_ref<T>&, BinaryAdd, const array<U>&>(
+      arr1, arr2);
+}
+
+/// Function to define element wise adding of an arrays and an array_ref.
+template <typename T, typename U>
+CUDA_HOST_DEVICE
+    array_expression<const array<T>&, BinarySub, const array_ref<U>&>
+    operator-(const array<T>& arr1, const array_ref<U>& arr2) {
+  assert(arr1.size() == arr2.size());
+  return array_expression<const array<T>&, BinarySub, const array_ref<U>&>(
+      arr1, arr2);
+}
+
+/// Function to define element wise adding of an arrays_ref and an array.
+template <typename T, typename U>
+CUDA_HOST_DEVICE
+    array_expression<const array_ref<T>&, BinarySub, const array<U>&>
+    operator-(const array_ref<T>& arr1, const array<U>& arr2) {
+  assert(arr1.size() == arr2.size());
+  return array_expression<const array_ref<T>&, BinarySub, const array<U>&>(
+      arr1, arr2);
+}
+
 } // namespace clad
 
 #endif // CLAD_ARRAY_H
diff --git a/include/clad/Differentiator/ArrayExpression.h b/include/clad/Differentiator/ArrayExpression.h
@@ -121,6 +121,45 @@ class array_expression {
         const array_expression<LeftExp, BinaryOp, RightExp>&, BinaryDiv, RE>(
         *this, r);
   }
+  // Operator overload for addition.
+  template <typename L1, typename BinOp1, typename R1, typename L2,
+            typename BinOp2, typename R2>
+  array_expression<const array_expression<L1, BinOp1, R1>&, BinaryAdd,
+                   const array_expression<L2, BinOp2, R2>&>
+  operator+(const array_expression<L2, BinOp2, R2>& r) const {
+    return array_expression<const array_expression<L1, BinOp1, R1>&, BinaryAdd,
+                            const array_expression<L2, BinOp2, R2>&>(*this, r);
+  }
+
+  // Operator overload for multiplication.
+  template <typename L1, typename BinOp1, typename R1, typename L2,
+            typename BinOp2, typename R2>
+  array_expression<const array_expression<L1, BinOp1, R1>&, BinarySub,
+                   const array_expression<L2, BinOp2, R2>&>
+  operator*(const array_expression<L2, BinOp2, R2>& r) const {
+    return array_expression<const array_expression<L1, BinOp1, R1>&, BinaryMul,
+                            const array_expression<L2, BinOp2, R2>&>(*this, r);
+  }
+
+  // Operator overload for subtraction.
+  template <typename L1, typename BinOp1, typename R1, typename L2,
+            typename BinOp2, typename R2>
+  array_expression<const array_expression<L1, BinOp1, R1>&, BinarySub,
+                   const array_expression<L2, BinOp2, R2>&>
+  operator-(const array_expression<L2, BinOp2, R2>& r) const {
+    return array_expression<const array_expression<L1, BinOp1, R1>&, BinarySub,
+                            const array_expression<L2, BinOp2, R2>&>(*this, r);
+  }
+
+  // Operator overload for division.
+  template <typename L1, typename BinOp1, typename R1, typename L2,
+            typename BinOp2, typename R2>
+  array_expression<const array_expression<L1, BinOp1, R1>&, BinaryDiv,
+                   const array_expression<L2, BinOp2, R2>&>
+  operator/(const array_expression<L2, BinOp2, R2>& r) const {
+    return array_expression<const array_expression<L1, BinOp1, R1>&, BinaryDiv,
+                            const array_expression<L2, BinOp2, R2>&>(*this, r);
+  }
 };
 
 // Operator overload for addition, when the right operand is an array_expression

diff --git a/include/clad/Differentiator/ArrayRef.h b/include/clad/Differentiator/ArrayRef.h
@@ -52,6 +52,14 @@ template <typename T> class array_ref {
     m_size = a.size();
     return *this;
   }
+  template <typename L, typename BinaryOp, typename R>
+  CUDA_HOST_DEVICE array_ref<T>&
+  operator=(const array_expression<L, BinaryOp, R>& arr_exp) {
+    assert(arr_exp.size() == m_size);
+    for (std::size_t i = 0; i < m_size; i++)
+      m_arr[i] = arr_exp[i];
+    return *this;
+  }
   /// Returns the size of the underlying array
   constexpr CUDA_HOST_DEVICE std::size_t size() const { return m_size; }
   constexpr CUDA_HOST_DEVICE PUREFUNC T* ptr() const { return m_arr; }
@@ -71,7 +79,7 @@ template <typename T> class array_ref {
   // Arithmetic overloads
   /// Divides the arrays element wise
   template <typename U>
-  CUDA_HOST_DEVICE array_ref<T>& operator/=(array_ref<U>& Ar) {
+  CUDA_HOST_DEVICE array_ref<T>& operator/=(const array_ref<U>& Ar) {
     assert(m_size == Ar.size() && "Size of both the array_refs must be equal "
                                   "for carrying out addition assignment");
     for (std::size_t i = 0; i < m_size; i++)
@@ -80,7 +88,7 @@ template <typename T> class array_ref {
   }
   /// Multiplies the arrays element wise
   template <typename U>
-  CUDA_HOST_DEVICE array_ref<T>& operator*=(array_ref<U>& Ar) {
+  CUDA_HOST_DEVICE array_ref<T>& operator*=(const array_ref<U>& Ar) {
     assert(m_size == Ar.size() && "Size of both the array_refs must be equal "
                                   "for carrying out addition assignment");
     for (std::size_t i = 0; i < m_size; i++)
@@ -89,7 +97,7 @@ template <typename T> class array_ref {
   }
   /// Adds the arrays element wise
   template <typename U>
-  CUDA_HOST_DEVICE array_ref<T>& operator+=(array_ref<U>& Ar) {
+  CUDA_HOST_DEVICE array_ref<T>& operator+=(const array_ref<U>& Ar) {
     assert(m_size == Ar.size() && "Size of both the array_refs must be equal "
                                   "for carrying out addition assignment");
     for (std::size_t i = 0; i < m_size; i++)
@@ -98,36 +106,76 @@ template <typename T> class array_ref {
   }
   /// Subtracts the arrays element wise
   template <typename U>
-  CUDA_HOST_DEVICE array_ref<T>& operator-=(array_ref<U>& Ar) {
+  CUDA_HOST_DEVICE array_ref<T>& operator-=(const array_ref<U>& Ar) {
     assert(m_size == Ar.size() && "Size of both the array_refs must be equal "
                                   "for carrying out addition assignment");
     for (std::size_t i = 0; i < m_size; i++)
       m_arr[i] -= Ar[i];
     return *this;
   }
   /// Divides the elements of the array_ref by elements of the array
-  template <typename U> CUDA_HOST_DEVICE array_ref<T>& operator/=(array<U>& A) {
+  template <typename U>
+  CUDA_HOST_DEVICE array_ref<T>& operator/=(const array<U>& A) {
     assert(m_size == A.size() && "Size of arrays must be equal");
     for (std::size_t i = 0; i < m_size; i++)
       m_arr[i] /= A[i];
     return *this;
   }
   /// Multiplies the elements of the array_ref by elements of the array
-  template <typename U> CUDA_HOST_DEVICE array_ref<T>& operator*=(array<U>& A) {
+  template <typename L, typename BinaryOp, typename R>
+  CUDA_HOST_DEVICE array_ref<T>&
+  operator*=(const array_expression<L, BinaryOp, R>& arr_exp) {
+    assert(arr_exp.size() == m_size);
+    for (std::size_t i = 0; i < m_size; i++)
+      m_arr[i] *= arr_exp[i];
+    return *this;
+  }
+  /// Adds the elements of the array_ref by elements of the array
+  template <typename L, typename BinaryOp, typename R>
+  CUDA_HOST_DEVICE array_ref<T>&
+  operator+=(const array_expression<L, BinaryOp, R>& arr_exp) {
+    assert(arr_exp.size() == m_size);
+    for (std::size_t i = 0; i < m_size; i++)
+      m_arr[i] += arr_exp[i];
+    return *this;
+  }
+  /// Subtracts the elements of the array_ref by elements of the array
+  template <typename L, typename BinaryOp, typename R>
+  CUDA_HOST_DEVICE array_ref<T>&
+  operator-=(const array_expression<L, BinaryOp, R>& arr_exp) {
+    assert(arr_exp.size() == m_size);
+    for (std::size_t i = 0; i < m_size; i++)
+      m_arr[i] -= arr_exp[i];
+    return *this;
+  }
+  /// Divides the elements of the array_ref by elements of the array
+  template <typename L, typename BinaryOp, typename R>
+  CUDA_HOST_DEVICE array_ref<T>&
+  operator/=(const array_expression<L, BinaryOp, R>& arr_exp) {
+    assert(arr_exp.size() == m_size);
+    for (std::size_t i = 0; i < m_size; i++)
+      m_arr[i] /= arr_exp[i];
+    return *this;
+  }
+  /// Multiplies the elements of the array_ref by elements of the array
+  template <typename U>
+  CUDA_HOST_DEVICE array_ref<T>& operator*=(const array<U>& A) {
     assert(m_size == A.size() && "Size of arrays must be equal");
     for (std::size_t i = 0; i < m_size; i++)
       m_arr[i] *= A[i];
     return *this;
   }
   /// Adds the elements of the array_ref by elements of the array
-  template <typename U> CUDA_HOST_DEVICE array_ref<T>& operator+=(array<U>& A) {
+  template <typename U>
+  CUDA_HOST_DEVICE array_ref<T>& operator+=(const array<U>& A) {
     assert(m_size == A.size() && "Size of arrays must be equal");
     for (std::size_t i = 0; i < m_size; i++)
       m_arr[i] += A[i];
     return *this;
   }
   /// Subtracts the elements of the array_ref by elements of the array
-  template <typename U> CUDA_HOST_DEVICE array_ref<T>& operator-=(array<U>& A) {
+  template <typename U>
+  CUDA_HOST_DEVICE array_ref<T>& operator-=(const array<U>& A) {
     assert(m_size == A.size() && "Size of arrays must be equal");
     for (std::size_t i = 0; i < m_size; i++)
       m_arr[i] -= A[i];

diff --git a/include/clad/Differentiator/BuiltinDerivatives.h b/include/clad/Differentiator/BuiltinDerivatives.h
@@ -199,31 +199,31 @@ CUDA_HOST_DEVICE inline void __builtin_powf_pullback(float x, float exponent,
 // FIXME: Add the rest of the __builtin_ routines for log, sqrt, abs, etc.
 
 namespace std {
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> abs_pushforward(T x, T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> abs_pushforward(T x, dT d_x) {
   if (x >= 0)
     return {x, d_x};
   else
     return {-x, -d_x};
 }
 
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> exp_pushforward(T x, T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> exp_pushforward(T x, dT d_x) {
   return {::std::exp(x), ::std::exp(x) * d_x};
 }
 
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> sin_pushforward(T x, T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> sin_pushforward(T x, dT d_x) {
   return {::std::sin(x), ::std::cos(x) * d_x};
 }
 
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> cos_pushforward(T x, T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> cos_pushforward(T x, dT d_x) {
   return {::std::cos(x), (-1) * ::std::sin(x) * d_x};
 }
 
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> sqrt_pushforward(T x, T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> sqrt_pushforward(T x, dT d_x) {
   return {::std::sqrt(x), (((T)1) / (((T)2) * ::std::sqrt(x))) * d_x};
 }
 
@@ -232,9 +232,9 @@ CUDA_HOST_DEVICE ValueAndPushforward<T, T> floor_pushforward(T x, T /*d_x*/) {
   return {::std::floor(x), (T)0};
 }
 
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> atan2_pushforward(T y, T x, T d_y,
-                                                             T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> atan2_pushforward(T y, T x, dT d_y,
+                                                              dT d_x) {
   return {::std::atan2(y, x),
           -(y / ((x * x) + (y * y))) * d_x + x / ((x * x) + (y * y)) * d_y};
 }
@@ -246,8 +246,8 @@ CUDA_HOST_DEVICE void atan2_pullback(T y, T x, U d_z, T* d_y, T* d_x) {
   *d_x += -(y / ((x * x) + (y * y))) * d_z;
 }
 
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> acos_pushforward(T x, T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> acos_pushforward(T x, dT d_x) {
   return {::std::acos(x), ((-1) / (::std::sqrt(1 - x * x))) * d_x};
 }
 
@@ -270,10 +270,31 @@ pow_pushforward(T1 x, T2 exponent, T1 d_x, T2 d_exponent) {
   auto val = ::std::pow(x, exponent);
   auto derivative = (exponent * ::std::pow(x, exponent - 1)) * d_x;
   // Only add directional derivative of base^exp w.r.t exp if the directional
-  // seed d_exponent is non-zero. This is required because if base is less than or
-  // equal to 0, then log(base) is undefined, and therefore if user only requested
-  // directional derivative of base^exp w.r.t base -- which is valid --, the result would
-  // be undefined because as per C++ valid number + NaN * 0 = NaN.
+  // seed d_exponent is non-zero. This is required because if base is less than
+  // or equal to 0, then log(base) is undefined, and therefore if user only
+  // requested directional derivative of base^exp w.r.t base -- which is valid
+  // --, the result would be undefined because as per C++ valid number + NaN * 0
+  // = NaN.
+  if (d_exponent)
+    derivative += (::std::pow(x, exponent) * ::std::log(x)) * d_exponent;
+  return {val, derivative};
+}
+
+template <typename T1, typename T2>
+CUDA_HOST_DEVICE
+    ValueAndPushforward<decltype(::std::pow(T1(), T2())),
+                        clad::array<decltype(::std::pow(T1(), T2()))>>
+    pow_pushforward(T1 x, T2 exponent, clad::array<T1> d_x,
+                    clad::array<T2> d_exponent) {
+  decltype(::std::pow(T1(), T2())) val = ::std::pow(x, exponent);
+  clad::array<decltype(::std::pow(T1(), T2()))> derivative =
+      (exponent * ::std::pow(x, exponent - 1)) * d_x;
+  // Only add directional derivative of base^exp w.r.t exp if the directional
+  // seed d_exponent is non-zero. This is required because if base is less than
+  // or equal to 0, then log(base) is undefined, and therefore if user only
+  // requested directional derivative of base^exp w.r.t base -- which is valid
+  // --, the result would be undefined because as per C++ valid number + NaN * 0
+  // = NaN.
   if (d_exponent)
     derivative += (::std::pow(x, exponent) * ::std::log(x)) * d_exponent;
   return {val, derivative};

diff --git a/include/clad/Differentiator/Differentiator.h b/include/clad/Differentiator/Differentiator.h
@@ -624,12 +624,13 @@ CUDA_HOST_DEVICE T push(tape<T>& to, ArgsT... val) {
             typename F, typename DerivedFnType = JacobianDerivedFnTraits_t<F>,
             typename = typename std::enable_if<
                 !std::is_class<remove_reference_and_pointer_t<F>>::value>::type>
-  constexpr CladFunction<
-      DerivedFnType, ExtractFunctorTraits_t<F>> __attribute__((annotate("J")))
+  constexpr CladFunction<DerivedFnType, ExtractFunctorTraits_t<F>,
+                         /*EnablePadding=*/true> __attribute__((annotate("J")))
   jacobian(F f, ArgSpec args = "",
            DerivedFnType derivedFn = static_cast<DerivedFnType>(nullptr),
            const char* code = "") {
-    return CladFunction<DerivedFnType, ExtractFunctorTraits_t<F>>(
+    return CladFunction<DerivedFnType, ExtractFunctorTraits_t<F>,
+                        /*EnablePadding=*/true>(
         derivedFn /* will be replaced by Jacobian*/, code);
   }
 
@@ -640,12 +641,13 @@ CUDA_HOST_DEVICE T push(tape<T>& to, ArgsT... val) {
             typename F, typename DerivedFnType = JacobianDerivedFnTraits_t<F>,
             typename = typename std::enable_if<
                 std::is_class<remove_reference_and_pointer_t<F>>::value>::type>
-  constexpr CladFunction<
-      DerivedFnType, ExtractFunctorTraits_t<F>> __attribute__((annotate("J")))
+  constexpr CladFunction<DerivedFnType, ExtractFunctorTraits_t<F>,
+                         /*EnablePadding=*/true> __attribute__((annotate("J")))
   jacobian(F&& f, ArgSpec args = "",
            DerivedFnType derivedFn = static_cast<DerivedFnType>(nullptr),
            const char* code = "") {
-    return CladFunction<DerivedFnType, ExtractFunctorTraits_t<F>>(
+    return CladFunction<DerivedFnType, ExtractFunctorTraits_t<F>,
+                        /*EnablePadding=*/true>(
         derivedFn /* will be replaced by Jacobian*/, code, f);
   }