Skip to content

Commit

Permalink
Merge branch 'master' into maa-lk-decom-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
maliasadi authored Jun 7, 2024
2 parents d4944fd + 1783b8b commit 4dc2f7f
Show file tree
Hide file tree
Showing 19 changed files with 282 additions and 425 deletions.
12 changes: 12 additions & 0 deletions .github/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Release 0.37.0-dev

### New features since last release
* Add `inverse` support for gate operations in `lightning.tensor` in the C++ layer.
[(#753)](https://github.com/PennyLaneAI/pennylane-lightning/pull/753)

* Add `observable` and `expval` support to `cutensornet` backed `lightning.tensor` C++ layer.
[(#728)](https://github.com/PennyLaneAI/pennylane-lightning/pull/728)

Expand Down Expand Up @@ -29,6 +32,12 @@

### Improvements

* Refactor C++ library names for `lightning.tensor`.
[(#755)](https://github.com/PennyLaneAI/pennylane-lightning/pull/755)

* Set `state_tensor` as `const` for the `MeasurementTNCuda` class.
[(#753)](https://github.com/PennyLaneAI/pennylane-lightning/pull/753)

* Updated Kokkos version and support to 4.3.01.
[(#725)](https://github.com/PennyLaneAI/pennylane-lightning/pull/725)

Expand Down Expand Up @@ -67,6 +76,9 @@

### Bug fixes

* `lightning.qubit` and `lightning.kokkos` use `qml.ops.Conditional.base` instead of `qml.ops.Conditional.then_op`.
[(#752)](https://github.com/PennyLaneAI/pennylane-lightning/pull/752)

* Fix AVX streaming operation support with newer GCC.
[(#729)](https://github.com/PennyLaneAI/pennylane-lightning/pull/729)

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/tests_lkcuda_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ jobs:
with:
key: ${{ matrix.os }}-kokkos${{ matrix.kokkos_version }}-${{ matrix.exec_model }}-${{ github.ref }}-${{ github.sha }}
path: ${{ github.workspace }}/Kokkos_install/${{ matrix.exec_model }}
retention-days: 1

pythontestswithKokkos:
needs: [builddeps]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ findCutensornet(lightning_external_libs)

set(LTENSOR_MPS_FILES MPSTNCuda.cpp CACHE INTERNAL "" FORCE)

add_library(${PL_TENSOR} STATIC ${LTENSOR_MPS_FILES})
add_library(${PL_BACKEND} STATIC ${LTENSOR_MPS_FILES})

##########################
## Enforce C++ Standard ##
Expand All @@ -35,23 +35,23 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
#########################
option(PL_DISABLE_CUDA_SAFETY "Build without CUDA call safety checks" OFF)

target_link_libraries(${PL_TENSOR} PUBLIC lightning_compile_options
target_link_libraries(${PL_BACKEND} PUBLIC lightning_compile_options
lightning_external_libs
${PL_BACKEND}_gates
${PL_BACKEND}_tensor
${PL_BACKEND}_tensornetBase
${PL_BACKEND}_utils
)

target_include_directories(${PL_TENSOR} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
set_property(TARGET ${PL_TENSOR} PROPERTY POSITION_INDEPENDENT_CODE ON)
target_include_directories(${PL_BACKEND} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
set_property(TARGET ${PL_BACKEND} PROPERTY POSITION_INDEPENDENT_CODE ON)

# To avoid DSO errors on platforms preferring static linkage, uncomment the following line:
# string(REPLACE "libcudart_static.a" "libcudart.so" CUDA_SHARED_RT "${CUDA_LIBRARIES}")
set_target_properties(${PL_TENSOR} PROPERTIES INSTALL_RPATH "$ORIGIN/../cuquantum/lib:$ORIGIN/../cuquantum/lib64:$ORIGIN/")
set_target_properties(${PL_BACKEND} PROPERTIES INSTALL_RPATH "$ORIGIN/../cuquantum/lib:$ORIGIN/../cuquantum/lib64:$ORIGIN/")

if(PL_DISABLE_CUDA_SAFETY)
target_compile_options(${PL_TENSOR} INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-DCUDA_UNSAFE>)
target_compile_options(${PL_BACKEND} INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-DCUDA_UNSAFE>)
endif()

###############################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
public:
MPSTNCuda() = delete;

// TODO: Add method to the constructor to allow users to select methods at
// runtime in the C++ layer
explicit MPSTNCuda(const std::size_t numQubits,
const std::size_t maxBondDim)
: BaseType(numQubits), maxBondDim_(maxBondDim),
Expand All @@ -90,6 +92,8 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
initTensors_();
}

// TODO: Add method to the constructor to allow users to select methods at
// runtime in the C++ layer
explicit MPSTNCuda(const std::size_t numQubits,
const std::size_t maxBondDim, DevTag<int> dev_tag)
: BaseType(numQubits, dev_tag), maxBondDim_(maxBondDim),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
public:
TNCudaBase() = delete;

// TODO: Add method to the constructor to all user to select methods at
// runtime in the C++ layer
explicit TNCudaBase(const std::size_t numQubits, int device_id = 0,
cudaStream_t stream_id = 0)
: BaseType(numQubits), handle_(make_shared_tncuda_handle()),
Expand All @@ -98,6 +100,8 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
/* cutensornetState_t * */ &quantumState_));
}

// TODO: Add method to the constructor to all user to select methods at
// runtime in the C++ layer
explicit TNCudaBase(const std::size_t numQubits, DevTag<int> dev_tag)
: BaseType(numQubits), handle_(make_shared_tncuda_handle()),
dev_tag_(dev_tag),
Expand Down Expand Up @@ -236,6 +240,13 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
bool adjoint = false,
const std::vector<PrecisionT> &params = {0.0},
const std::vector<ComplexT> &gate_matrix = {}) {
// TODO: Need to revisit this line of code for the exact TN backend.
// We should be able to turn on/ skip this check based on the backend,
// if(getMethod() == "mps") { ... }
PL_ABORT_IF(
wires.size() > 2,
"Unsupported gate: MPS method only supports 1, 2-wires gates");

auto &&par = (params.empty()) ? std::vector<PrecisionT>{0.0} : params;
DataBuffer<PrecisionT, int> dummy_device_data(
Pennylane::Util::exp2(wires.size()), getDevTag());
Expand All @@ -259,17 +270,18 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
/* void * */ static_cast<void *>(dummy_device_data.getData()),
/* const int64_t *tensorModeStrides */ nullptr,
/* const int32_t immutable */ 1,
/* const int32_t adjoint */ adjoint,
/* const int32_t adjoint */ 0,
/* const int32_t unitary */ 1,
/* int64_t * */ &id));
if (!gate_matrix.empty()) {
auto gate_key = std::make_pair(opName, par);
std::vector<CFP_t> matrix_cu =
cuUtil::complexToCu<ComplexT>(gate_matrix);
gate_cache_->add_gate(static_cast<std::size_t>(id), gate_key,
matrix_cu);
matrix_cu, adjoint);
} else {
gate_cache_->add_gate(static_cast<std::size_t>(id), opName, par);
gate_cache_->add_gate(static_cast<std::size_t>(id), opName, par,
adjoint);
}
PL_CUTENSORNET_IS_SUCCESS(cutensornetStateUpdateTensorOperator(
/* const cutensornetHandle_t */ getTNCudaHandle(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ target_link_libraries(tensornet_base_tests INTERFACE Catch2::Catch2)
ProcessTestOptions(tensornet_base_tests)

# Create dependency on the dynamically defined simulator/backend target.
target_link_libraries(tensornet_base_tests INTERFACE ${PL_TENSOR} ${PL_TENSOR}_utils)
target_link_libraries(tensornet_base_tests INTERFACE ${PL_BACKEND} ${PL_BACKEND}_tncuda_utils)

target_sources(tensornet_base_tests INTERFACE runner_lightning_tensor_tensornetBase.cpp)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,19 @@ template <class PrecisionT> class TNCudaGateCache {
* @param gate_name String representing the name of the given gate.
* @param gate_param Vector of parameter values. `{}` if non-parametric
* gate.
* @param adjoint Boolean value indicating whether the adjoint of the gate
* is to be appended. The default is false.
*/
void add_gate(const std::size_t gate_id, const std::string &gate_name,
[[maybe_unused]] std::vector<PrecisionT> gate_param = {}) {
[[maybe_unused]] std::vector<PrecisionT> gate_param = {},
bool adjoint = false) {
auto gate_key = std::make_pair(gate_name, gate_param);

auto &gateMap =
cuGates::DynamicGateDataAccess<PrecisionT>::getInstance();

add_gate(gate_id, gate_key, gateMap.getGateData(gate_name, gate_param));
add_gate(gate_id, gate_key, gateMap.getGateData(gate_name, gate_param),
adjoint);
}
/**
* @brief Add gate numerical value to the cache, indexed by the id of gate
Expand All @@ -93,10 +97,12 @@ template <class PrecisionT> class TNCudaGateCache {
* its associated parameter value.
* @param gate_data_host Vector of complex floating point values
* representing the gate data on host.
* @param adjoint Boolean value indicating whether the adjoint of the gate
* is to be appended. The default is false.
*/

void add_gate(const std::size_t gate_id, gate_key_info gate_key,
const std::vector<CFP_t> &gate_data_host) {
const std::vector<CFP_t> &gate_data_host,
bool adjoint = false) {
const std::size_t rank = Pennylane::Util::log2(gate_data_host.size());
auto modes = std::vector<std::size_t>(rank, 0);
auto extents = std::vector<std::size_t>(rank, 2);
Expand All @@ -108,8 +114,34 @@ template <class PrecisionT> class TNCudaGateCache {
std::piecewise_construct, std::forward_as_tuple(gate_id),
std::forward_as_tuple(gate_key, std::move(tensor)));

device_gates_.at(gate_id).second.getDataBuffer().CopyHostDataToGpu(
gate_data_host.data(), gate_data_host.size());
if (adjoint) {
// TODO: This is a temporary solution for gates data transpose.
// There should be a better way to handle this, but there is not
// a big performance issue for now since the size of gates is small.
// TODO: The implementation here can be optimized by generating the
// data buffer directly on the device instead of performing the
// transpose operation here
std::vector<CFP_t> data_host_transpose(gate_data_host.size());

const std::size_t col_size = 1 << (rank / 2);
const std::size_t row_size = col_size;

PL_ASSERT(col_size * row_size == gate_data_host.size());

for (std::size_t idx = 0; idx < gate_data_host.size(); idx++) {
std::size_t col = idx / row_size;
std::size_t row = idx % row_size;

data_host_transpose.at(row * col_size + col) = {
gate_data_host.at(idx).x, -gate_data_host.at(idx).y};
}

device_gates_.at(gate_id).second.getDataBuffer().CopyHostDataToGpu(
data_host_transpose.data(), data_host_transpose.size());
} else {
device_gates_.at(gate_id).second.getDataBuffer().CopyHostDataToGpu(
gate_data_host.data(), gate_data_host.size());
}

total_alloc_bytes_ += (sizeof(CFP_t) * gate_data_host.size());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ FetchAndIncludeCatch()
add_library(${PL_BACKEND}_gates_tests INTERFACE)
target_link_libraries(${PL_BACKEND}_gates_tests INTERFACE Catch2::Catch2
${PL_BACKEND}_gates
${PL_TENSOR}
${PL_BACKEND}
)

ProcessTestOptions(${PL_BACKEND}_gates_tests)
Expand Down
Loading

0 comments on commit 4dc2f7f

Please sign in to comment.